{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.4906669495331548, "calibration/batch_distribution_entropy": 0.2772102231626003, "calibration/batch_entropy_100bins": 0.3506612698092768, "calibration/batch_entropy_10bins": 0.2772102231626003, "calibration/batch_entropy_50bins": 0.40836954215900895, "calibration/batch_uniqueness": 0.5106568545813284, "calibration/confidence_entropy": 0.22095930903324054, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.45256316983360323, "calibration/mean_confidence": 0.9168870637225405, "calibration/prompt_uniqueness": 0.3720936191585115, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020138888888888908, "completions/max_length": 4041.4, "completions/max_terminated_length": 4041.4, "completions/mean_length": 522.3839477539062, "completions/mean_terminated_length": 533.1372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.0036375881172716618, "learning_rate": 5.952380952380953e-07, "loss": 0.0046, "num_tokens": 9132071.0, "reward": 0.4900794804096222, "reward_std": 0.45279757380485536, "rewards/accuracy_reward": 0.262673607468605, "rewards/brier_reward": 0.31377317309379577, "rewards/confidence_uniqueness_reward": 0.29045385122299194, "rewards/format_reward": 0.6011284708976745, "rewards/frontier_aurc_reward": 0.27633480429649354, "rewards/frontier_coverage_0": 0.27633480429649354, "rewards/frontier_coverage_1": 0.27633480429649354, "rewards/frontier_coverage_10": 0.27633480429649354, "rewards/frontier_coverage_15": 0.27633480429649354, "rewards/frontier_coverage_20": 0.27633480429649354, "rewards/frontier_coverage_25": 0.27633480429649354, "rewards/frontier_coverage_5": 0.27633480429649354, "rewards/frontier_ece_reward": 0.27633480429649354, "rewards/frontier_entropy_batch_reward": -0.575112247467041, "signal/accuracy_reward/centered_abs_mean": 0.3098524272441864, "signal/accuracy_reward/group_bin_occupancy": 0.23923611111111112, "signal/accuracy_reward/group_std_mean": 0.3695395112037659, "signal/accuracy_reward/group_zero_std_frac": 0.08611111268401146, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1549262136220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1549262136220932, "signal/advantage_abs_mean": 0.38927987217903137, "signal/advantage_pre_scale_abs_mean": 0.38927987217903137, "signal/advantage_pre_scale_std": 0.4591569066047668, "signal/advantage_std": 0.4591569066047668, "signal/brier_reward/centered_abs_mean": 0.3203325092792511, "signal/brier_reward/group_bin_occupancy": 0.5138888888888888, "signal/brier_reward/group_std_mean": 0.3733829379081726, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03203325048089027, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03203325048089027, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2360519289970398, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6114583333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.28973097801208497, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023605193197727203, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023605193197727203, "signal/format_reward/centered_abs_mean": 0.4414442241191864, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.4756063938140869, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2207221120595932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2207221120595932, "signal/frontier_aurc_reward/centered_abs_mean": 0.31023464202880857, "signal/frontier_aurc_reward/group_bin_occupancy": 0.39861111111111114, "signal/frontier_aurc_reward/group_std_mean": 0.3682305455207825, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_0/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_0/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_0/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_1/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_1/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_1/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_10/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_10/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_10/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_15/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_15/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_15/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_20/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_20/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_20/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_25/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_25/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_25/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_5/centered_abs_mean": 0.31023464202880857, "signal/frontier_coverage_5/group_bin_occupancy": 0.39861111111111114, "signal/frontier_coverage_5/group_std_mean": 0.3682305455207825, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003877933043986559, "signal/frontier_ece_reward/centered_abs_mean": 0.31023464202880857, "signal/frontier_ece_reward/group_bin_occupancy": 0.39861111111111114, "signal/frontier_ece_reward/group_std_mean": 0.3682305455207825, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03102346435189247, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03102346435189247, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.449966561794281, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30486111111111114, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4825741767883301, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04499665722250938, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04499665722250938, "step": 5 }, { "calibration/aurc": 0.5273605183169956, "calibration/batch_distribution_entropy": 0.2604161993663435, "calibration/batch_entropy_100bins": 0.34585173532919666, "calibration/batch_entropy_10bins": 0.2604161993663435, "calibration/batch_entropy_50bins": 0.40165626020405876, "calibration/batch_uniqueness": 0.502415543865784, "calibration/confidence_entropy": 0.22185867612429216, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.054814814814814816, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4804000280177963, "calibration/mean_confidence": 0.9198524716449924, "calibration/prompt_uniqueness": 0.3770142813382146, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018750000000000024, "completions/max_length": 3941.4, "completions/max_terminated_length": 3941.4, "completions/mean_length": 474.6142333984375, "completions/mean_terminated_length": 483.915966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.8, "epoch": 0.023999700003749954, "grad_norm": 0.0029272218234837055, "learning_rate": 1.1904761904761906e-06, "loss": 0.0019, "num_tokens": 17682347.0, "reward": 0.5634824514389039, "reward_std": 0.42838944792747496, "rewards/accuracy_reward": 0.289496523141861, "rewards/brier_reward": 0.35398504734039304, "rewards/confidence_uniqueness_reward": 0.35191494822502134, "rewards/format_reward": 0.7096354126930237, "rewards/frontier_aurc_reward": 0.3055602788925171, "rewards/frontier_coverage_0": 0.3055602788925171, "rewards/frontier_coverage_1": 0.3055602788925171, "rewards/frontier_coverage_10": 0.3055602788925171, "rewards/frontier_coverage_15": 0.3055602788925171, "rewards/frontier_coverage_20": 0.3055602788925171, "rewards/frontier_coverage_25": 0.3055602788925171, "rewards/frontier_coverage_5": 0.3055602788925171, "rewards/frontier_ece_reward": 0.3055602788925171, "rewards/frontier_entropy_batch_reward": -0.6778560400009155, "signal/accuracy_reward/centered_abs_mean": 0.32107747793197633, "signal/accuracy_reward/group_bin_occupancy": 0.2420138888888889, "signal/accuracy_reward/group_std_mean": 0.38136300444602966, "signal/accuracy_reward/group_zero_std_frac": 0.06388889066874981, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16053873896598816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16053873896598816, "signal/advantage_abs_mean": 0.3579964995384216, "signal/advantage_pre_scale_abs_mean": 0.3579964995384216, "signal/advantage_pre_scale_std": 0.4340688169002533, "signal/advantage_std": 0.4340688169002533, "signal/brier_reward/centered_abs_mean": 0.3165506422519684, "signal/brier_reward/group_bin_occupancy": 0.5385416666666667, "signal/brier_reward/group_std_mean": 0.3699568331241608, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03165506534278393, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03165506534278393, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22355839908123015, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.611111111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.27911095023155214, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022355839610099792, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022355839610099792, "signal/format_reward/centered_abs_mean": 0.36045463681221007, "signal/format_reward/group_bin_occupancy": 0.24895833333333334, "signal/format_reward/group_std_mean": 0.42311119437217715, "signal/format_reward/group_zero_std_frac": 0.00833333358168602, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18022731840610504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.18022731840610504, "signal/frontier_aurc_reward/centered_abs_mean": 0.31446189880371095, "signal/frontier_aurc_reward/group_bin_occupancy": 0.4184027777777778, "signal/frontier_aurc_reward/group_std_mean": 0.3722220480442047, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_0/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_0/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_0/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_1/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_1/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_1/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_10/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_10/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_10/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_15/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_15/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_15/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_20/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_20/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_20/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_25/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_25/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_25/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_5/centered_abs_mean": 0.31446189880371095, "signal/frontier_coverage_5/group_bin_occupancy": 0.4184027777777778, "signal/frontier_coverage_5/group_std_mean": 0.3722220480442047, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039307738188654184, "signal/frontier_ece_reward/centered_abs_mean": 0.31446189880371095, "signal/frontier_ece_reward/group_bin_occupancy": 0.4184027777777778, "signal/frontier_ece_reward/group_std_mean": 0.3722220480442047, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03144619055092335, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03144619055092335, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38704427480697634, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30694444444444446, "signal/frontier_entropy_batch_reward/group_std_mean": 0.444519454240799, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03870442658662796, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03870442658662796, "step": 10 }, { "calibration/aurc": 0.5167585765758467, "calibration/batch_distribution_entropy": 0.302369935218339, "calibration/batch_entropy_100bins": 0.36347179220282333, "calibration/batch_entropy_10bins": 0.302369935218339, "calibration/batch_entropy_50bins": 0.422450421929475, "calibration/batch_uniqueness": 0.5326884584432358, "calibration/confidence_entropy": 0.2414202213951906, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.07401129943502824, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49241807139440097, "calibration/mean_confidence": 0.9089811337755254, "calibration/prompt_uniqueness": 0.43205592433820234, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555536, "completions/max_length": 3884.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 432.0356872558594, "completions/mean_terminated_length": 436.19459228515626, "completions/min_length": 0.0, "completions/min_terminated_length": 47.2, "epoch": 0.03599955000562493, "grad_norm": 0.0013383845798671246, "learning_rate": 1.7857142857142859e-06, "loss": -0.0098, "num_tokens": 25761382.0, "reward": 0.7086469292640686, "reward_std": 0.34355844259262086, "rewards/accuracy_reward": 0.33203125, "rewards/brier_reward": 0.43671444058418274, "rewards/confidence_uniqueness_reward": 0.5003645718097687, "rewards/format_reward": 0.9299479365348816, "rewards/frontier_aurc_reward": 0.36155037879943847, "rewards/frontier_coverage_0": 0.36155037879943847, "rewards/frontier_coverage_1": 0.36155037879943847, "rewards/frontier_coverage_10": 0.36155037879943847, "rewards/frontier_coverage_15": 0.36155037879943847, "rewards/frontier_coverage_20": 0.36155037879943847, "rewards/frontier_coverage_25": 0.36155037879943847, "rewards/frontier_coverage_5": 0.36155037879943847, "rewards/frontier_ece_reward": 0.36155037879943847, "rewards/frontier_entropy_batch_reward": -0.8836066842079162, "signal/accuracy_reward/centered_abs_mean": 0.320947265625, "signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334, "signal/accuracy_reward/group_std_mean": 0.3797557592391968, "signal/accuracy_reward/group_zero_std_frac": 0.08333333507180214, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1604736328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1604736328125, "signal/advantage_abs_mean": 0.2800030291080475, "signal/advantage_pre_scale_abs_mean": 0.2800030291080475, "signal/advantage_pre_scale_std": 0.35208467245101926, "signal/advantage_std": 0.35208467245101926, "signal/brier_reward/centered_abs_mean": 0.30012611150741575, "signal/brier_reward/group_bin_occupancy": 0.6045138888888888, "signal/brier_reward/group_std_mean": 0.3523735284805298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03001261092722416, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03001261092722416, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18756819367408753, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6347222222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.2359054923057556, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018756820634007455, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018756820634007455, "signal/format_reward/centered_abs_mean": 0.1186360664665699, "signal/format_reward/group_bin_occupancy": 0.215625, "signal/format_reward/group_std_mean": 0.2005244880914688, "signal/format_reward/group_zero_std_frac": 0.27500001192092893, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05931803323328495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05931803323328495, "signal/frontier_aurc_reward/centered_abs_mean": 0.3130494236946106, "signal/frontier_aurc_reward/group_bin_occupancy": 0.4802083333333334, "signal/frontier_aurc_reward/group_std_mean": 0.36802791357040404, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_0/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_0/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_0/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_1/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_1/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_1/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_10/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_10/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_10/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_15/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_15/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_15/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_20/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_20/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_20/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_25/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_25/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_25/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_5/centered_abs_mean": 0.3130494236946106, "signal/frontier_coverage_5/group_bin_occupancy": 0.4802083333333334, "signal/frontier_coverage_5/group_std_mean": 0.36802791357040404, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003913117619231343, "signal/frontier_ece_reward/centered_abs_mean": 0.3130494236946106, "signal/frontier_ece_reward/group_bin_occupancy": 0.4802083333333334, "signal/frontier_ece_reward/group_std_mean": 0.36802791357040404, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031304940953850745, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031304940953850745, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19124604463577272, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3173611111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2992013156414032, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000167638063, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019124605879187583, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019124605879187583, "step": 15 }, { "calibration/aurc": 0.46943229603801095, "calibration/batch_distribution_entropy": 0.39007867356490084, "calibration/batch_entropy_100bins": 0.395942880351314, "calibration/batch_entropy_10bins": 0.39007867356490084, "calibration/batch_entropy_50bins": 0.4590101472703655, "calibration/batch_uniqueness": 0.5824014613826816, "calibration/buffer_distribution_entropy": 0.3087817459654445, "calibration/buffer_entropy_100bins": 0.3742379471182134, "calibration/buffer_entropy_10bins": 0.3087817459654445, "calibration/buffer_entropy_50bins": 0.4334464532283356, "calibration/confidence_entropy": 0.3112217014467542, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.021409921671018274, "calibration/coverage@30%": 0.042631853785900786, "calibration/coverage@5%": 0.0, "calibration/ece": 0.396358403667547, "calibration/mean_confidence": 0.8837364281993014, "calibration/prompt_uniqueness": 0.5032654060971099, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011111111111111117, "completions/max_length": 3641.8, "completions/max_terminated_length": 3641.8, "completions/mean_length": 448.12864990234374, "completions/mean_terminated_length": 453.1479919433594, "completions/min_length": 0.0, "completions/min_terminated_length": 81.4, "epoch": 0.04799940000749991, "grad_norm": 0.0010318111162632704, "learning_rate": 2.380952380952381e-06, "loss": -0.0095, "num_tokens": 34037520.0, "reward": 0.7606997966766358, "reward_std": 0.268657323718071, "rewards/accuracy_reward": 0.43125, "rewards/brier_reward": 0.547679090499878, "rewards/confidence_uniqueness_reward": 0.5864130258560181, "rewards/format_reward": 0.9827256917953491, "rewards/frontier_aurc_reward": 0.17052557989954947, "rewards/frontier_coverage_0": 0.1809873386286199, "rewards/frontier_coverage_1": 0.1809873386286199, "rewards/frontier_coverage_10": 0.1809873386286199, "rewards/frontier_coverage_15": 0.1809873386286199, "rewards/frontier_coverage_20": 0.1809873386286199, "rewards/frontier_coverage_25": 0.1809873386286199, "rewards/frontier_coverage_5": 0.1809873386286199, "rewards/frontier_ece_reward": 0.16180366985499858, "rewards/frontier_entropy_batch_reward": -0.9384560346603393, "signal/accuracy_reward/centered_abs_mean": 0.3011718809604645, "signal/accuracy_reward/group_bin_occupancy": 0.24305555555555558, "signal/accuracy_reward/group_std_mean": 0.37031384706497195, "signal/accuracy_reward/group_zero_std_frac": 0.055555556900799274, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15058594048023224, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15058594048023224, "signal/advantage_abs_mean": 0.21566397547721863, "signal/advantage_pre_scale_abs_mean": 0.21566397547721863, "signal/advantage_pre_scale_std": 0.2766494989395142, "signal/advantage_std": 0.2766494989395142, "signal/brier_reward/centered_abs_mean": 0.26345101892948153, "signal/brier_reward/group_bin_occupancy": 0.64375, "signal/brier_reward/group_std_mean": 0.3199512481689453, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026345102488994597, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.026345102488994597, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18284115493297576, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6225694444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.220096555352211, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018284116685390473, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018284116685390473, "signal/format_reward/centered_abs_mean": 0.03140733540058136, "signal/format_reward/group_bin_occupancy": 0.16180555555555554, "signal/format_reward/group_std_mean": 0.06625646576285363, "signal/format_reward/group_zero_std_frac": 0.7055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01570366770029068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01570366770029068, "signal/frontier_aurc_reward/centered_abs_mean": 0.11926614781841635, "signal/frontier_aurc_reward/group_bin_occupancy": 0.632986111111111, "signal/frontier_aurc_reward/group_std_mean": 0.14433029675856232, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.001490826773806475, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.001490826773806475, "signal/frontier_coverage_0/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_0/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_0/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_1/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_1/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_1/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_10/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_10/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_10/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_15/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_15/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_15/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_20/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_20/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_20/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_25/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_25/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_25/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_5/centered_abs_mean": 0.13713490664958955, "signal/frontier_coverage_5/group_bin_occupancy": 0.6166666666666667, "signal/frontier_coverage_5/group_std_mean": 0.17561094984412193, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017141862597782164, "signal/frontier_ece_reward/centered_abs_mean": 0.209702330827713, "signal/frontier_ece_reward/group_bin_occupancy": 0.4791666666666667, "signal/frontier_ece_reward/group_std_mean": 0.25489507615566254, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02097023241221905, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02097023241221905, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10760662704706192, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.27881944444444445, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2045659214258194, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.2472222238779068, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010760662704706192, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010760662704706192, "step": 20 }, { "calibration/aurc": 0.36227726815472583, "calibration/batch_distribution_entropy": 0.5686667040639641, "calibration/batch_entropy_100bins": 0.46015330431227275, "calibration/batch_entropy_10bins": 0.5686667040639641, "calibration/batch_entropy_50bins": 0.5345691542936407, "calibration/batch_uniqueness": 0.7103172249744851, "calibration/buffer_distribution_entropy": 0.3577538162654611, "calibration/buffer_entropy_100bins": 0.3942461572372418, "calibration/buffer_entropy_10bins": 0.3577538162654611, "calibration/buffer_entropy_50bins": 0.45707372334068264, "calibration/confidence_entropy": 0.38490834959662473, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.04647519582245431, "calibration/coverage@20%": 0.1167582795107874, "calibration/coverage@25%": 0.23117356053318674, "calibration/coverage@30%": 0.4, "calibration/coverage@5%": 0.0, "calibration/ece": 0.24973699229306895, "calibration/mean_confidence": 0.8392709913535077, "calibration/prompt_uniqueness": 0.6169621770560031, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010850694444444442, "completions/max_length": 3946.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 506.2393310546875, "completions/mean_terminated_length": 511.80390625, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.05999925000937488, "grad_norm": 0.0006832340732216835, "learning_rate": 2.9761904761904763e-06, "loss": -0.0061, "num_tokens": 42993845.0, "reward": 0.8102578759193421, "reward_std": 0.21867357492446898, "rewards/accuracy_reward": 0.5464409768581391, "rewards/brier_reward": 0.6634011149406434, "rewards/confidence_uniqueness_reward": 0.6947197318077087, "rewards/format_reward": 0.9862847328186035, "rewards/frontier_aurc_reward": -0.004134428594261408, "rewards/frontier_coverage_0": 0.0019480698741972447, "rewards/frontier_coverage_1": 0.0019480698741972447, "rewards/frontier_coverage_10": 0.0019480698741972447, "rewards/frontier_coverage_15": 0.0019480698741972447, "rewards/frontier_coverage_20": 0.0019480698741972447, "rewards/frontier_coverage_25": 0.0019480698741972447, "rewards/frontier_coverage_5": 0.0019480698741972447, "rewards/frontier_ece_reward": 0.022111652628518642, "rewards/frontier_entropy_batch_reward": -0.9424700856208801, "signal/accuracy_reward/centered_abs_mean": 0.27006836533546447, "signal/accuracy_reward/group_bin_occupancy": 0.23680555555555555, "signal/accuracy_reward/group_std_mean": 0.3380339086055756, "signal/accuracy_reward/group_zero_std_frac": 0.10555555671453476, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13503418266773223, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13503418266773223, "signal/advantage_abs_mean": 0.17086843848228456, "signal/advantage_pre_scale_abs_mean": 0.17086843848228456, "signal/advantage_pre_scale_std": 0.2329561173915863, "signal/advantage_std": 0.2329561173915863, "signal/brier_reward/centered_abs_mean": 0.20691562294960023, "signal/brier_reward/group_bin_occupancy": 0.7083333333333335, "signal/brier_reward/group_std_mean": 0.25875782668590547, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020691563189029694, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020691563189029694, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1167220115661621, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6739583333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.14770560711622238, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011672201752662658, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011672201752662658, "signal/format_reward/centered_abs_mean": 0.024240451492369176, "signal/format_reward/group_bin_occupancy": 0.14965277777777777, "signal/format_reward/group_std_mean": 0.04735830463469028, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012120225746184588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012120225746184588, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027726517990231516, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7083333333333334, "signal/frontier_aurc_reward/group_std_mean": 0.00418220111168921, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4658145887078716e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4658145887078716e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_0/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_0/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_1/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_1/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_1/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_10/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_10/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_10/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_15/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_15/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_15/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_20/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_20/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_20/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_25/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_25/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_25/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_5/centered_abs_mean": 0.04216930866241455, "signal/frontier_coverage_5/group_bin_occupancy": 0.7506944444444444, "signal/frontier_coverage_5/group_std_mean": 0.06601626127958297, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005271163769066334, "signal/frontier_ece_reward/centered_abs_mean": 0.11826423108577729, "signal/frontier_ece_reward/group_bin_occupancy": 0.6211805555555556, "signal/frontier_ece_reward/group_std_mean": 0.14597586840391158, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011826423183083534, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011826423183083534, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10026835501194001, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2576388888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.20214761793613434, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.27777778208255766, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010026836022734641, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010026836022734641, "step": 25 }, { "calibration/aurc": 0.28030120111462953, "calibration/batch_distribution_entropy": 0.6860934029668021, "calibration/batch_entropy_100bins": 0.47509042724861394, "calibration/batch_entropy_10bins": 0.6860934029668021, "calibration/batch_entropy_50bins": 0.5571003799016714, "calibration/batch_uniqueness": 0.7315819333475163, "calibration/buffer_distribution_entropy": 0.44782185406696956, "calibration/buffer_entropy_100bins": 0.432568095461576, "calibration/buffer_entropy_10bins": 0.44782185406696956, "calibration/buffer_entropy_50bins": 0.5016514349890427, "calibration/confidence_entropy": 0.5032410205898324, "calibration/coverage@0%": 0.0020942408376963353, "calibration/coverage@1%": 0.0020942408376963353, "calibration/coverage@10%": 0.019895287958115182, "calibration/coverage@15%": 0.02722513089005236, "calibration/coverage@20%": 0.13418440334318799, "calibration/coverage@25%": 0.35231416191675474, "calibration/coverage@30%": 0.7237647681307741, "calibration/coverage@5%": 0.0020942408376963353, "calibration/ece": 0.12329795557525347, "calibration/mean_confidence": 0.7611700501923784, "calibration/prompt_uniqueness": 0.6430085674820394, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01753472222222221, "completions/max_length": 4004.2, "completions/max_terminated_length": 4004.2, "completions/mean_length": 587.9065307617187, "completions/mean_terminated_length": 598.4256591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 129.6, "epoch": 0.07199910001124986, "grad_norm": 0.0004425600345712155, "learning_rate": 3.5714285714285718e-06, "loss": -0.0096, "num_tokens": 52876448.0, "reward": 0.8403311133384704, "reward_std": 0.19080861508846284, "rewards/accuracy_reward": 0.5989583373069763, "rewards/brier_reward": 0.7185598731040954, "rewards/confidence_uniqueness_reward": 0.7107228398323059, "rewards/format_reward": 0.9805555701255798, "rewards/frontier_aurc_reward": -0.0033631839789450167, "rewards/frontier_coverage_0": -0.010212953144218773, "rewards/frontier_coverage_1": -0.010212953144218773, "rewards/frontier_coverage_10": -0.010212953144218773, "rewards/frontier_coverage_15": -0.010212953144218773, "rewards/frontier_coverage_20": -0.010212953144218773, "rewards/frontier_coverage_25": -0.010212953144218773, "rewards/frontier_coverage_5": -0.010212953144218773, "rewards/frontier_ece_reward": 0.026352604478597642, "rewards/frontier_entropy_batch_reward": -0.9405368328094482, "signal/accuracy_reward/centered_abs_mean": 0.23267143666744233, "signal/accuracy_reward/group_bin_occupancy": 0.225, "signal/accuracy_reward/group_std_mean": 0.29561176896095276, "signal/accuracy_reward/group_zero_std_frac": 0.2, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11633571833372117, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11633571833372117, "signal/advantage_abs_mean": 0.1454018846154213, "signal/advantage_pre_scale_abs_mean": 0.1454018846154213, "signal/advantage_pre_scale_std": 0.21304741203784944, "signal/advantage_std": 0.21304741203784944, "signal/brier_reward/centered_abs_mean": 0.16264356970787047, "signal/brier_reward/group_bin_occupancy": 0.7659722222222223, "signal/brier_reward/group_std_mean": 0.20704346001148224, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016264356672763824, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016264356672763824, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11230488270521163, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6600694444444444, "signal/confidence_uniqueness_reward/group_std_mean": 0.1442235678434372, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011230488680303097, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011230488680303097, "signal/format_reward/centered_abs_mean": 0.03282335102558136, "signal/format_reward/group_bin_occupancy": 0.15729166666666666, "signal/format_reward/group_std_mean": 0.061944124102592465, "signal/format_reward/group_zero_std_frac": 0.7416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01641167551279068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01641167551279068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016978590982034802, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7225694444444445, "signal/frontier_aurc_reward/group_std_mean": 0.0026338005904108287, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1223240401013756e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1223240401013756e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_0/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_0/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_1/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_1/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_1/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_10/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_10/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_10/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_15/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_15/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_15/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_20/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_20/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_20/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_25/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_25/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_25/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_5/centered_abs_mean": 0.058112889528274536, "signal/frontier_coverage_5/group_bin_occupancy": 0.8069444444444445, "signal/frontier_coverage_5/group_std_mean": 0.0808319017291069, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007264111656695605, "signal/frontier_ece_reward/centered_abs_mean": 0.07377360388636589, "signal/frontier_ece_reward/group_bin_occupancy": 0.7083333333333334, "signal/frontier_ece_reward/group_std_mean": 0.09336973130702972, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007377360574901104, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007377360574901104, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10462483614683152, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2517361111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2067155808210373, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.2888888955116272, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010462483763694764, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010462483763694764, "step": 30 }, { "calibration/aurc": 0.2571494357039368, "calibration/batch_distribution_entropy": 0.6915737521952584, "calibration/batch_entropy_100bins": 0.49647134799940157, "calibration/batch_entropy_10bins": 0.6915737521952584, "calibration/batch_entropy_50bins": 0.5702481285621201, "calibration/batch_uniqueness": 0.7275370635688326, "calibration/buffer_distribution_entropy": 0.5412420769857675, "calibration/buffer_entropy_100bins": 0.47152686196123883, "calibration/buffer_entropy_10bins": 0.5412420769857675, "calibration/buffer_entropy_50bins": 0.5471484481600025, "calibration/confidence_entropy": 0.5555159358951477, "calibration/coverage@0%": 0.0021108982484691493, "calibration/coverage@1%": 0.0021108982484691493, "calibration/coverage@10%": 0.05757886667151095, "calibration/coverage@15%": 0.10641635561934473, "calibration/coverage@20%": 0.2736149765303272, "calibration/coverage@25%": 0.5459915878765254, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.01375110988868079, "calibration/ece": 0.09159402045781669, "calibration/mean_confidence": 0.7156475196151146, "calibration/prompt_uniqueness": 0.6363292257990881, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019791666666666673, "completions/max_length": 3775.4, "completions/max_terminated_length": 3775.4, "completions/mean_length": 639.6553955078125, "completions/mean_terminated_length": 652.6179931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 195.2, "epoch": 0.08399895001312484, "grad_norm": 0.0005004682461731136, "learning_rate": 4.166666666666667e-06, "loss": -0.0115, "num_tokens": 63322718.0, "reward": 0.8644884347915649, "reward_std": 0.17564767897129058, "rewards/accuracy_reward": 0.6354166746139527, "rewards/brier_reward": 0.7466939330101013, "rewards/confidence_uniqueness_reward": 0.7118972659111023, "rewards/format_reward": 0.9786458373069763, "rewards/frontier_aurc_reward": -0.002773157227784395, "rewards/frontier_coverage_0": -0.022431935556232928, "rewards/frontier_coverage_1": -0.022431935556232928, "rewards/frontier_coverage_10": -0.022431935556232928, "rewards/frontier_coverage_15": -0.022431935556232928, "rewards/frontier_coverage_20": -0.022431935556232928, "rewards/frontier_coverage_25": -0.022431935556232928, "rewards/frontier_coverage_5": -0.022431935556232928, "rewards/frontier_ece_reward": 0.019234086759388445, "rewards/frontier_entropy_batch_reward": -0.8832790136337281, "signal/accuracy_reward/centered_abs_mean": 0.20523003339767457, "signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223, "signal/accuracy_reward/group_std_mean": 0.26482152938842773, "signal/accuracy_reward/group_zero_std_frac": 0.272222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10261501669883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10261501669883728, "signal/advantage_abs_mean": 0.13137987852096558, "signal/advantage_pre_scale_abs_mean": 0.13137987852096558, "signal/advantage_pre_scale_std": 0.1967354714870453, "signal/advantage_std": 0.1967354714870453, "signal/brier_reward/centered_abs_mean": 0.13508679270744323, "signal/brier_reward/group_bin_occupancy": 0.804861111111111, "signal/brier_reward/group_std_mean": 0.1748790144920349, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013508679158985615, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013508679158985615, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.132964688539505, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6645833333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.16216650009155273, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013296469673514366, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013296469673514366, "signal/format_reward/centered_abs_mean": 0.03264973983168602, "signal/format_reward/group_bin_occupancy": 0.15104166666666669, "signal/format_reward/group_std_mean": 0.055517496168613435, "signal/format_reward/group_zero_std_frac": 0.7916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01632486991584301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01632486991584301, "signal/frontier_aurc_reward/centered_abs_mean": 0.001194856408983469, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7513888888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0018525759922340512, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4935705621610396e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4935705621610396e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_0/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_0/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_1/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_1/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_1/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_10/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_10/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_10/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_15/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_15/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_15/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_20/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_20/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_20/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_25/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_25/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_25/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_5/centered_abs_mean": 0.07465948313474655, "signal/frontier_coverage_5/group_bin_occupancy": 0.8434027777777777, "signal/frontier_coverage_5/group_std_mean": 0.09974148273468017, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009332435904070735, "signal/frontier_ece_reward/centered_abs_mean": 0.045828448981046675, "signal/frontier_ece_reward/group_bin_occupancy": 0.5989583333333334, "signal/frontier_ece_reward/group_std_mean": 0.062764922529459, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00458284504711628, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00458284504711628, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19048750698566436, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34965277777777776, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31525389552116395, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10000000093132258, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019048751518130302, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019048751518130302, "step": 35 }, { "calibration/aurc": 0.3167568910312298, "calibration/batch_distribution_entropy": 0.8158753007650332, "calibration/batch_entropy_100bins": 0.7754491287714476, "calibration/batch_entropy_10bins": 0.8158753007650332, "calibration/batch_entropy_50bins": 0.8143281422196861, "calibration/batch_uniqueness": 0.8912077436325131, "calibration/buffer_distribution_entropy": 0.6146759941471388, "calibration/buffer_entropy_100bins": 0.523218368594508, "calibration/buffer_entropy_10bins": 0.6146759941471388, "calibration/buffer_entropy_50bins": 0.6006597501001077, "calibration/confidence_entropy": 0.5871514805879432, "calibration/coverage@0%": 0.009528019872847458, "calibration/coverage@1%": 0.009528019872847458, "calibration/coverage@10%": 0.012702623047450633, "calibration/coverage@15%": 0.025002088288092344, "calibration/coverage@20%": 0.07057350282502413, "calibration/coverage@25%": 0.3320165297852925, "calibration/coverage@30%": 0.4634608198502722, "calibration/coverage@5%": 0.012702623047450633, "calibration/ece": 0.14937205173281268, "calibration/mean_confidence": 0.6103869655080439, "calibration/prompt_uniqueness": 0.8211696893763782, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020572916666666673, "completions/max_length": 3653.4, "completions/max_terminated_length": 3653.4, "completions/mean_length": 677.6016479492188, "completions/mean_terminated_length": 691.8317626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 219.6, "epoch": 0.09599880001499982, "grad_norm": 0.0004252003855071962, "learning_rate": 4.761904761904762e-06, "loss": -0.0151, "num_tokens": 74248209.0, "reward": 0.9098719239234925, "reward_std": 0.170660662651062, "rewards/accuracy_reward": 0.6442708253860474, "rewards/brier_reward": 0.7366751790046692, "rewards/confidence_uniqueness_reward": 0.8783312201499939, "rewards/format_reward": 0.977343738079071, "rewards/frontier_aurc_reward": -0.0024696006905287502, "rewards/frontier_coverage_0": -0.045524665340781215, "rewards/frontier_coverage_1": -0.045524665340781215, "rewards/frontier_coverage_10": -0.045524665340781215, "rewards/frontier_coverage_15": -0.045524665340781215, "rewards/frontier_coverage_20": -0.045524665340781215, "rewards/frontier_coverage_25": -0.045524665340781215, "rewards/frontier_coverage_5": -0.045524665340781215, "rewards/frontier_ece_reward": 0.00879600141197443, "rewards/frontier_entropy_batch_reward": -0.5930132269859314, "signal/accuracy_reward/centered_abs_mean": 0.18386501371860503, "signal/accuracy_reward/group_bin_occupancy": 0.21423611111111113, "signal/accuracy_reward/group_std_mean": 0.24782910346984863, "signal/accuracy_reward/group_zero_std_frac": 0.286111119389534, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09193250685930252, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09193250685930252, "signal/advantage_abs_mean": 0.125397390127182, "signal/advantage_pre_scale_abs_mean": 0.125397390127182, "signal/advantage_pre_scale_std": 0.19242337942123414, "signal/advantage_std": 0.19242337942123414, "signal/brier_reward/centered_abs_mean": 0.14977407157421113, "signal/brier_reward/group_bin_occupancy": 0.88125, "signal/brier_reward/group_std_mean": 0.19257045090198516, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014977407827973365, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014977407827973365, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07975448668003082, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7291666666666667, "signal/confidence_uniqueness_reward/group_std_mean": 0.11011579483747483, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007975448574870824, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007975448574870824, "signal/format_reward/centered_abs_mean": 0.03768988735973835, "signal/format_reward/group_bin_occupancy": 0.15659722222222222, "signal/format_reward/group_std_mean": 0.06584622710943222, "signal/format_reward/group_zero_std_frac": 0.7472222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.018844943679869174, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.018844943679869174, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011158839566633104, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7399305555555556, "signal/frontier_aurc_reward/group_std_mean": 0.0017659143777564168, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3948549531050958e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3948549531050958e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_0/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_0/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_1/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_1/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_1/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_10/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_10/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_10/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_15/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_15/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_15/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_20/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_20/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_20/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_25/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_25/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_25/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_5/centered_abs_mean": 0.13970998972654342, "signal/frontier_coverage_5/group_bin_occupancy": 0.8805555555555558, "signal/frontier_coverage_5/group_std_mean": 0.18841452598571778, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017463748808950186, "signal/frontier_ece_reward/centered_abs_mean": 0.03632725402712822, "signal/frontier_ece_reward/group_bin_occupancy": 0.617361111111111, "signal/frontier_ece_reward/group_std_mean": 0.054545311629772185, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036327255424112082, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036327255424112082, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42244229912757875, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7017361111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.49159557223320005, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04224423244595528, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04224423244595528, "step": 40 }, { "calibration/aurc": 0.2206993313754026, "calibration/batch_distribution_entropy": 0.9724187710530586, "calibration/batch_entropy_100bins": 0.9570917986236539, "calibration/batch_entropy_10bins": 0.9724187710530586, "calibration/batch_entropy_50bins": 0.9691055641857181, "calibration/batch_uniqueness": 0.9515475678236415, "calibration/buffer_distribution_entropy": 0.7119279342121769, "calibration/buffer_entropy_100bins": 0.6250743755525399, "calibration/buffer_entropy_10bins": 0.7119279342121769, "calibration/buffer_entropy_50bins": 0.6950079249559582, "calibration/confidence_entropy": 0.5458137036233696, "calibration/coverage@0%": 0.020305716058574348, "calibration/coverage@1%": 0.020305716058574348, "calibration/coverage@10%": 0.057875053452992965, "calibration/coverage@15%": 0.08067257367669875, "calibration/coverage@20%": 0.3934809427991608, "calibration/coverage@25%": 0.8858671787769413, "calibration/coverage@30%": 0.9934959349593496, "calibration/coverage@5%": 0.025541318152815184, "calibration/ece": 0.24761169294312846, "calibration/mean_confidence": 0.506569974973911, "calibration/prompt_uniqueness": 0.8903720034084112, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021180555555555557, "completions/max_length": 3845.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 723.7879516601563, "completions/mean_terminated_length": 739.548291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 221.6, "epoch": 0.1079986500168748, "grad_norm": 0.0005128175835125148, "learning_rate": 4.909638554216868e-06, "loss": -0.0162, "num_tokens": 85721510.0, "reward": 0.9443981885910034, "reward_std": 0.16389898359775543, "rewards/accuracy_reward": 0.6543402791023254, "rewards/brier_reward": 0.6937033653259277, "rewards/confidence_uniqueness_reward": 0.9300097227096558, "rewards/format_reward": 0.978038203716278, "rewards/frontier_aurc_reward": -0.002327358117327094, "rewards/frontier_coverage_0": -0.0785758774727583, "rewards/frontier_coverage_1": -0.0785758774727583, "rewards/frontier_coverage_10": -0.0785758774727583, "rewards/frontier_coverage_15": -0.0785758774727583, "rewards/frontier_coverage_20": -0.0785758774727583, "rewards/frontier_coverage_25": -0.0785758774727583, "rewards/frontier_coverage_5": -0.0785758774727583, "rewards/frontier_ece_reward": -0.0007223693886771798, "rewards/frontier_entropy_batch_reward": -0.27185641825199125, "signal/accuracy_reward/centered_abs_mean": 0.19038628339767455, "signal/accuracy_reward/group_bin_occupancy": 0.21388888888888888, "signal/accuracy_reward/group_std_mean": 0.2518360376358032, "signal/accuracy_reward/group_zero_std_frac": 0.28888889253139494, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09519314169883727, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09519314169883727, "signal/advantage_abs_mean": 0.12383081167936325, "signal/advantage_pre_scale_abs_mean": 0.12383081167936325, "signal/advantage_pre_scale_std": 0.1818700224161148, "signal/advantage_std": 0.1818700224161148, "signal/brier_reward/centered_abs_mean": 0.2113836646080017, "signal/brier_reward/group_bin_occupancy": 0.9274305555555555, "signal/brier_reward/group_std_mean": 0.2588895708322525, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02113836631178856, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02113836631178856, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045432856678962706, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8055555555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.07177356258034706, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004543285816907883, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004543285816907883, "signal/format_reward/centered_abs_mean": 0.03470594622194767, "signal/format_reward/group_bin_occupancy": 0.15312499999999998, "signal/format_reward/group_std_mean": 0.05929795354604721, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017352973110973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017352973110973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014846524223685264, "signal/frontier_aurc_reward/group_bin_occupancy": 0.673611111111111, "signal/frontier_aurc_reward/group_std_mean": 0.0024840928614139556, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.855815662565874e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.855815662565874e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_0/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_0/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_1/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_1/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_1/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_10/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_10/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_10/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_15/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_15/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_15/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_20/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_20/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_20/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_25/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_25/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_25/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_5/centered_abs_mean": 0.24903229475021363, "signal/frontier_coverage_5/group_bin_occupancy": 0.9211805555555556, "signal/frontier_coverage_5/group_std_mean": 0.32121285796165466, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003112903609871864, "signal/frontier_ece_reward/centered_abs_mean": 0.07297626733779908, "signal/frontier_ece_reward/group_bin_occupancy": 0.804513888888889, "signal/frontier_ece_reward/group_std_mean": 0.09936150461435318, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0072976269759237765, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0072976269759237765, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35308589935302737, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222223, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4245911598205566, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03530859164893627, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03530859164893627, "step": 45 }, { "calibration/aurc": 0.43490251490767023, "calibration/batch_distribution_entropy": 0.9529251572760946, "calibration/batch_entropy_100bins": 0.94042765491534, "calibration/batch_entropy_10bins": 0.9529251572760946, "calibration/batch_entropy_50bins": 0.952941110179381, "calibration/batch_uniqueness": 0.9468997638508156, "calibration/buffer_distribution_entropy": 0.772094451286123, "calibration/buffer_entropy_100bins": 0.6983399485850001, "calibration/buffer_entropy_10bins": 0.772094451286123, "calibration/buffer_entropy_50bins": 0.7590855500965974, "calibration/confidence_entropy": 0.4826112831276858, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.003825136612021858, "calibration/coverage@20%": 0.01294977313373133, "calibration/coverage@25%": 0.02212548072935199, "calibration/coverage@30%": 0.032500629861073965, "calibration/coverage@5%": 0.0, "calibration/ece": 0.23091974604376944, "calibration/mean_confidence": 0.6206757591253088, "calibration/prompt_uniqueness": 0.8837748821431823, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01909722222222223, "completions/max_length": 3686.6, "completions/max_terminated_length": 3686.6, "completions/mean_length": 757.1386474609375, "completions/mean_terminated_length": 771.9427124023438, "completions/min_length": 0.0, "completions/min_terminated_length": 215.2, "epoch": 0.11999850001874976, "grad_norm": 0.0005383854149840772, "learning_rate": 4.759036144578314e-06, "loss": -0.016, "num_tokens": 97541347.0, "reward": 0.942762804031372, "reward_std": 0.16731804311275483, "rewards/accuracy_reward": 0.6445312619209289, "rewards/brier_reward": 0.7177430987358093, "rewards/confidence_uniqueness_reward": 0.9267112493515015, "rewards/format_reward": 0.9802083373069763, "rewards/frontier_aurc_reward": -0.0026557988487184046, "rewards/frontier_coverage_0": -0.034297770075500014, "rewards/frontier_coverage_1": -0.034297770075500014, "rewards/frontier_coverage_10": -0.034297770075500014, "rewards/frontier_coverage_15": -0.034297770075500014, "rewards/frontier_coverage_20": -0.034297770075500014, "rewards/frontier_coverage_25": -0.034297770075500014, "rewards/frontier_coverage_5": -0.034297770075500014, "rewards/frontier_ece_reward": 0.018495285883545876, "rewards/frontier_entropy_batch_reward": -0.3286770522594452, "signal/accuracy_reward/centered_abs_mean": 0.18459743857383729, "signal/accuracy_reward/group_bin_occupancy": 0.20972222222222223, "signal/accuracy_reward/group_std_mean": 0.24165296256542207, "signal/accuracy_reward/group_zero_std_frac": 0.32222222685813906, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09229871928691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09229871928691864, "signal/advantage_abs_mean": 0.12547171711921692, "signal/advantage_pre_scale_abs_mean": 0.12547171711921692, "signal/advantage_pre_scale_std": 0.1906561881303787, "signal/advantage_std": 0.1906561881303787, "signal/brier_reward/centered_abs_mean": 0.21231429576873778, "signal/brier_reward/group_bin_occupancy": 0.8871527777777779, "signal/brier_reward/group_std_mean": 0.26225546598434446, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021231430768966674, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021231430768966674, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.046096354722976685, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7819444444444444, "signal/confidence_uniqueness_reward/group_std_mean": 0.07447160631418229, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004609635565429926, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004609635565429926, "signal/format_reward/centered_abs_mean": 0.03169487938284874, "signal/format_reward/group_bin_occupancy": 0.15381944444444443, "signal/format_reward/group_std_mean": 0.05741401687264443, "signal/format_reward/group_zero_std_frac": 0.769444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01584743969142437, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01584743969142437, "signal/frontier_aurc_reward/centered_abs_mean": 0.002861758507788181, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6586805555555555, "signal/frontier_aurc_reward/group_std_mean": 0.00452820798382163, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5771980765275654e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5771980765275654e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_0/group_bin_occupancy": 0.865625, "signal/frontier_coverage_0/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_1/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_1/group_bin_occupancy": 0.865625, "signal/frontier_coverage_1/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_10/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_10/group_bin_occupancy": 0.865625, "signal/frontier_coverage_10/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_15/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_15/group_bin_occupancy": 0.865625, "signal/frontier_coverage_15/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_20/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_20/group_bin_occupancy": 0.865625, "signal/frontier_coverage_20/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_25/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_25/group_bin_occupancy": 0.865625, "signal/frontier_coverage_25/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_5/centered_abs_mean": 0.19227492213249206, "signal/frontier_coverage_5/group_bin_occupancy": 0.865625, "signal/frontier_coverage_5/group_std_mean": 0.2632899612188339, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002403436554595828, "signal/frontier_ece_reward/centered_abs_mean": 0.08109527826309204, "signal/frontier_ece_reward/group_bin_occupancy": 0.8055555555555556, "signal/frontier_ece_reward/group_std_mean": 0.1046181559562683, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00810952829197049, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00810952829197049, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.389397132396698, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7958333333333334, "signal/frontier_entropy_batch_reward/group_std_mean": 0.45458305478096006, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03893971517682075, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03893971517682075, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.2935445625829087, "eval_calibration/batch_distribution_entropy": 0.8770594734217364, "eval_calibration/batch_entropy_100bins": 0.6997554410803705, "eval_calibration/batch_entropy_10bins": 0.8770594734217364, "eval_calibration/batch_entropy_50bins": 0.7724054270082138, "eval_calibration/batch_uniqueness": 0.8926113206729102, "eval_calibration/buffer_distribution_entropy": 0.7915374005307516, "eval_calibration/buffer_entropy_100bins": 0.7306963981499348, "eval_calibration/buffer_entropy_10bins": 0.7915374005307516, "eval_calibration/buffer_entropy_50bins": 0.7851080575149143, "eval_calibration/confidence_entropy": 0.4853972869479209, "eval_calibration/coverage@0%": 0.09122983870967742, "eval_calibration/coverage@1%": 0.09122983870967742, "eval_calibration/coverage@10%": 0.15036962365591397, "eval_calibration/coverage@15%": 0.3385416666666667, "eval_calibration/coverage@20%": 0.3923051075268817, "eval_calibration/coverage@25%": 0.6832997311827956, "eval_calibration/coverage@30%": 0.7689852150537635, "eval_calibration/coverage@5%": 0.09122983870967742, "eval_calibration/ece": 0.2806067776772616, "eval_calibration/mean_confidence": 0.6447706628426046, "eval_calibration/prompt_uniqueness": 0.8926113206729102, "eval_completions/clipped_ratio": 0.028472222222222215, "eval_completions/max_length": 2254.5, "eval_completions/max_terminated_length": 2254.5, "eval_completions/mean_length": 760.700185139974, "eval_completions/mean_terminated_length": 782.921864827474, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 306.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 97541347.0, "eval_reward": 0.8601760963598887, "eval_reward_std": 0.2734912733236949, "eval_rewards/accuracy_reward": 0.6284722288449606, "eval_rewards/brier_reward": 0.7127746840318044, "eval_rewards/confidence_uniqueness_reward": 0.8644578456878662, "eval_rewards/format_reward": 0.972222218910853, "eval_rewards/frontier_aurc_reward": -0.0026267794310115278, "eval_rewards/frontier_coverage_0": -0.027196575111399095, "eval_rewards/frontier_coverage_1": -0.027196575111399095, "eval_rewards/frontier_coverage_10": -0.027196575111399095, "eval_rewards/frontier_coverage_15": -0.027196575111399095, "eval_rewards/frontier_coverage_20": -0.027196575111399095, "eval_rewards/frontier_coverage_25": -0.027196575111399095, "eval_rewards/frontier_coverage_5": -0.027196575111399095, "eval_rewards/frontier_ece_reward": 0.01740353476877014, "eval_rewards/frontier_entropy_batch_reward": -0.972222218910853, "eval_runtime": 211.1601, "eval_samples_per_second": 4.736, "eval_signal/accuracy_reward/centered_abs_mean": 0.4504123230775197, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.48123881717522937, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22520616153875986, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22520616153875986, "eval_signal/advantage_abs_mean": 0.238955890138944, "eval_signal/advantage_pre_scale_abs_mean": 0.238955890138944, "eval_signal/advantage_pre_scale_std": 0.27245956162611645, "eval_signal/advantage_std": 0.27245956162611645, "eval_signal/brier_reward/centered_abs_mean": 0.24741176019112268, "eval_signal/brier_reward/group_bin_occupancy": 0.9027777777777778, "eval_signal/brier_reward/group_std_mean": 0.30227703352769214, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024741175894935925, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.024741175894935925, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07496882602572441, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13260164111852646, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007496882385263841, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007496882385263841, "eval_signal/format_reward/centered_abs_mean": 0.051974826492369175, "eval_signal/format_reward/group_bin_occupancy": 0.19444444444444442, "eval_signal/format_reward/group_std_mean": 0.11886927050848801, "eval_signal/format_reward/group_zero_std_frac": 0.4444444502393405, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.025987413246184587, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.025987413246184587, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0029930932990585766, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7048611111111112, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005078944067160289, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.741366587443432e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.741366587443432e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_0/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_1/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_10/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_15/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_20/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_25/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2153101439277331, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8958333333333334, "eval_signal/frontier_coverage_5/group_std_mean": 0.32011035084724426, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026913767602915564, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.07921455428004265, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8854166666666666, "eval_signal/frontier_ece_reward/group_std_mean": 0.10567496220270793, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007921455971275767, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007921455971275767, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.051974826492369175, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.19444444444444442, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.11886927050848801, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4444444502393405, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00519748261043181, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00519748261043181, "eval_steps_per_second": 0.028, "step": 50 }, { "calibration/aurc": 0.29101160035536516, "calibration/batch_distribution_entropy": 0.9611125544862056, "calibration/batch_entropy_100bins": 0.9450097531606187, "calibration/batch_entropy_10bins": 0.9611125544862056, "calibration/batch_entropy_50bins": 0.9579487928522715, "calibration/batch_uniqueness": 0.9479291763813198, "calibration/buffer_distribution_entropy": 0.802567505648549, "calibration/buffer_entropy_100bins": 0.7485174194719845, "calibration/buffer_entropy_10bins": 0.802567505648549, "calibration/buffer_entropy_50bins": 0.7990773865767944, "calibration/confidence_entropy": 0.4802339646904608, "calibration/coverage@0%": 0.005950499478157149, "calibration/coverage@1%": 0.005950499478157149, "calibration/coverage@10%": 0.05489786789920977, "calibration/coverage@15%": 0.08344920618123151, "calibration/coverage@20%": 0.38568360233253723, "calibration/coverage@25%": 0.5033324927722896, "calibration/coverage@30%": 0.5689476556018148, "calibration/coverage@5%": 0.005950499478157149, "calibration/ece": 0.19851863392301644, "calibration/mean_confidence": 0.6001296239600306, "calibration/prompt_uniqueness": 0.8830341006380135, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020225694444444442, "completions/max_length": 3700.8, "completions/max_terminated_length": 3700.8, "completions/mean_length": 826.671875, "completions/mean_terminated_length": 844.00693359375, "completions/min_length": 0.0, "completions/min_terminated_length": 215.4, "epoch": 0.13199835002062474, "grad_norm": 0.0003322226111777127, "learning_rate": 4.60843373493976e-06, "loss": -0.0163, "num_tokens": 110145183.0, "reward": 0.9478794574737549, "reward_std": 0.1643240600824356, "rewards/accuracy_reward": 0.6479166626930237, "rewards/brier_reward": 0.7111706972122193, "rewards/confidence_uniqueness_reward": 0.9300649881362915, "rewards/format_reward": 0.978993046283722, "rewards/frontier_aurc_reward": -0.00247355445753783, "rewards/frontier_coverage_0": -0.04043981209397316, "rewards/frontier_coverage_1": -0.04043981209397316, "rewards/frontier_coverage_10": -0.04043981209397316, "rewards/frontier_coverage_15": -0.04043981209397316, "rewards/frontier_coverage_20": -0.04043981209397316, "rewards/frontier_coverage_25": -0.04043981209397316, "rewards/frontier_coverage_5": -0.04043981209397316, "rewards/frontier_ece_reward": 0.013695706240832805, "rewards/frontier_entropy_batch_reward": -0.27499137818813324, "signal/accuracy_reward/centered_abs_mean": 0.1818793386220932, "signal/accuracy_reward/group_bin_occupancy": 0.20902777777777776, "signal/accuracy_reward/group_std_mean": 0.2380914032459259, "signal/accuracy_reward/group_zero_std_frac": 0.3277777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0909396693110466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0909396693110466, "signal/advantage_abs_mean": 0.12402228713035583, "signal/advantage_pre_scale_abs_mean": 0.12402228713035583, "signal/advantage_pre_scale_std": 0.18747871220111847, "signal/advantage_std": 0.18747871220111847, "signal/brier_reward/centered_abs_mean": 0.2243650496006012, "signal/brier_reward/group_bin_occupancy": 0.8930555555555555, "signal/brier_reward/group_std_mean": 0.27434876561164856, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022436505928635598, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022436505928635598, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.044763144105672836, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7885416666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.0715868502855301, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004476314364001155, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004476314364001155, "signal/format_reward/centered_abs_mean": 0.03343098945915699, "signal/format_reward/group_bin_occupancy": 0.15243055555555557, "signal/format_reward/group_std_mean": 0.05805426985025406, "signal/format_reward/group_zero_std_frac": 0.7805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016715494729578496, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016715494729578496, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024474710691720246, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6569444444444444, "signal/frontier_aurc_reward/group_std_mean": 0.003879967099055648, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0593389601563105e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0593389601563105e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_0/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_0/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_1/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_1/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_1/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_10/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_10/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_10/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_15/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_15/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_15/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_20/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_20/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_20/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_25/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_25/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_25/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_5/centered_abs_mean": 0.2272661030292511, "signal/frontier_coverage_5/group_bin_occupancy": 0.8708333333333333, "signal/frontier_coverage_5/group_std_mean": 0.30359464287757876, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002840826474130154, "signal/frontier_ece_reward/centered_abs_mean": 0.08030216246843339, "signal/frontier_ece_reward/group_bin_occupancy": 0.8336805555555555, "signal/frontier_ece_reward/group_std_mean": 0.10320238173007965, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00803021676838398, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00803021676838398, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3581114888191223, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7989583333333334, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42964831590652464, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035811149328947064, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035811149328947064, "step": 55 }, { "calibration/aurc": 0.34571040020458177, "calibration/batch_distribution_entropy": 0.9622518418951735, "calibration/batch_entropy_100bins": 0.9519820743610523, "calibration/batch_entropy_10bins": 0.9622518418951735, "calibration/batch_entropy_50bins": 0.9639614098520457, "calibration/batch_uniqueness": 0.9497552026076601, "calibration/buffer_distribution_entropy": 0.8279265448356025, "calibration/buffer_entropy_100bins": 0.7843073089436804, "calibration/buffer_entropy_10bins": 0.8279265448356025, "calibration/buffer_entropy_50bins": 0.8281794662889326, "calibration/confidence_entropy": 0.517672969074423, "calibration/coverage@0%": 0.005273351823406105, "calibration/coverage@1%": 0.005273351823406105, "calibration/coverage@10%": 0.04925240941502914, "calibration/coverage@15%": 0.25197225967087444, "calibration/coverage@20%": 0.2773683429293877, "calibration/coverage@25%": 0.41644513658785354, "calibration/coverage@30%": 0.43501219130263385, "calibration/coverage@5%": 0.005273351823406105, "calibration/ece": 0.2220912439346801, "calibration/mean_confidence": 0.5937931918125188, "calibration/prompt_uniqueness": 0.88741231383509, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019791666666666652, "completions/max_length": 3753.4, "completions/max_terminated_length": 3753.4, "completions/mean_length": 890.4375854492188, "completions/mean_terminated_length": 908.44677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 269.2, "epoch": 0.14399820002249972, "grad_norm": 0.000333575124386698, "learning_rate": 4.457831325301205e-06, "loss": -0.0167, "num_tokens": 123499600.0, "reward": 0.9414217710494995, "reward_std": 0.16816579103469848, "rewards/accuracy_reward": 0.6327257037162781, "rewards/brier_reward": 0.7177812337875367, "rewards/confidence_uniqueness_reward": 0.9303562760353088, "rewards/format_reward": 0.9795138835906982, "rewards/frontier_aurc_reward": -0.002422581100836396, "rewards/frontier_coverage_0": -0.03289339188486338, "rewards/frontier_coverage_1": -0.03289339188486338, "rewards/frontier_coverage_10": -0.03289339188486338, "rewards/frontier_coverage_15": -0.03289339188486338, "rewards/frontier_coverage_20": -0.03289339188486338, "rewards/frontier_coverage_25": -0.03289339188486338, "rewards/frontier_coverage_5": -0.03289339188486338, "rewards/frontier_ece_reward": 0.011117698205634952, "rewards/frontier_entropy_batch_reward": -0.27715103328227997, "signal/accuracy_reward/centered_abs_mean": 0.1928331136703491, "signal/accuracy_reward/group_bin_occupancy": 0.21284722222222224, "signal/accuracy_reward/group_std_mean": 0.25233509540557864, "signal/accuracy_reward/group_zero_std_frac": 0.2972222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09641655683517455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09641655683517455, "signal/advantage_abs_mean": 0.12606564462184905, "signal/advantage_pre_scale_abs_mean": 0.12606564462184905, "signal/advantage_pre_scale_std": 0.19068869948387146, "signal/advantage_std": 0.19068869948387146, "signal/brier_reward/centered_abs_mean": 0.20728689432144165, "signal/brier_reward/group_bin_occupancy": 0.9072916666666666, "signal/brier_reward/group_std_mean": 0.2543580114841461, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020728689804673194, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020728689804673194, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.043829741328954695, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7784722222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.07412301301956177, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004382974375039339, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004382974375039339, "signal/format_reward/centered_abs_mean": 0.03404947929084301, "signal/format_reward/group_bin_occupancy": 0.15694444444444444, "signal/format_reward/group_std_mean": 0.06289056539535523, "signal/format_reward/group_zero_std_frac": 0.7444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017024739645421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017024739645421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021621018648147585, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6690972222222222, "signal/frontier_aurc_reward/group_std_mean": 0.0034721433650702236, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.702627461985685e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.702627461985685e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_0/group_bin_occupancy": 0.88125, "signal/frontier_coverage_0/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_1/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_1/group_bin_occupancy": 0.88125, "signal/frontier_coverage_1/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_10/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_10/group_bin_occupancy": 0.88125, "signal/frontier_coverage_10/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_15/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_15/group_bin_occupancy": 0.88125, "signal/frontier_coverage_15/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_20/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_20/group_bin_occupancy": 0.88125, "signal/frontier_coverage_20/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_25/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_25/group_bin_occupancy": 0.88125, "signal/frontier_coverage_25/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_5/centered_abs_mean": 0.20764632821083068, "signal/frontier_coverage_5/group_bin_occupancy": 0.88125, "signal/frontier_coverage_5/group_std_mean": 0.2768110573291779, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002595579205080867, "signal/frontier_ece_reward/centered_abs_mean": 0.069983871281147, "signal/frontier_ece_reward/group_bin_occupancy": 0.8329861111111111, "signal/frontier_ece_reward/group_std_mean": 0.09274458438158036, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006998386885970831, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006998386885970831, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3481548845767975, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7982638888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4152218818664551, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034815489500761035, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034815489500761035, "step": 60 }, { "calibration/aurc": 0.2649692233853358, "calibration/batch_distribution_entropy": 0.9561995071075307, "calibration/batch_entropy_100bins": 0.948571532521358, "calibration/batch_entropy_10bins": 0.9561995071075307, "calibration/batch_entropy_50bins": 0.9586481201565193, "calibration/batch_uniqueness": 0.9481666389414347, "calibration/buffer_distribution_entropy": 0.8478863193017032, "calibration/buffer_entropy_100bins": 0.8120021970066679, "calibration/buffer_entropy_10bins": 0.8478863193017032, "calibration/buffer_entropy_50bins": 0.85018672179855, "calibration/confidence_entropy": 0.5380890006629526, "calibration/coverage@0%": 0.01272840150692083, "calibration/coverage@1%": 0.01272840150692083, "calibration/coverage@10%": 0.08449456058933413, "calibration/coverage@15%": 0.17000585864866288, "calibration/coverage@20%": 0.49943408426598157, "calibration/coverage@25%": 0.6136174853877333, "calibration/coverage@30%": 0.6983957219251338, "calibration/coverage@5%": 0.015894628419849587, "calibration/ece": 0.1826561762527292, "calibration/mean_confidence": 0.5725721913547105, "calibration/prompt_uniqueness": 0.8862852361567064, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017534722222222233, "completions/max_length": 3875.2, "completions/max_terminated_length": 3875.2, "completions/mean_length": 922.3502563476562, "completions/mean_terminated_length": 938.8082885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 259.6, "epoch": 0.1559980500243747, "grad_norm": 0.0003632537554949522, "learning_rate": 4.307228915662651e-06, "loss": -0.0139, "num_tokens": 137219123.0, "reward": 0.9586760997772217, "reward_std": 0.15382943153381348, "rewards/accuracy_reward": 0.6605902791023255, "rewards/brier_reward": 0.7354176759719848, "rewards/confidence_uniqueness_reward": 0.9327790856361389, "rewards/format_reward": 0.9817708253860473, "rewards/frontier_aurc_reward": -0.002018653857521713, "rewards/frontier_coverage_0": -0.03963281610049307, "rewards/frontier_coverage_1": -0.03963281610049307, "rewards/frontier_coverage_10": -0.03963281610049307, "rewards/frontier_coverage_15": -0.03963281610049307, "rewards/frontier_coverage_20": -0.03963281610049307, "rewards/frontier_coverage_25": -0.03963281610049307, "rewards/frontier_coverage_5": -0.03963281610049307, "rewards/frontier_ece_reward": 0.010902080871164798, "rewards/frontier_entropy_batch_reward": -0.26921272873878477, "signal/accuracy_reward/centered_abs_mean": 0.16614583134651184, "signal/accuracy_reward/group_bin_occupancy": 0.20694444444444446, "signal/accuracy_reward/group_std_mean": 0.2237447142601013, "signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08307291567325592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08307291567325592, "signal/advantage_abs_mean": 0.11447918117046356, "signal/advantage_pre_scale_abs_mean": 0.11447918117046356, "signal/advantage_pre_scale_std": 0.17718282341957092, "signal/advantage_std": 0.17718282341957092, "signal/brier_reward/centered_abs_mean": 0.1887804687023163, "signal/brier_reward/group_bin_occupancy": 0.9003472222222222, "signal/brier_reward/group_std_mean": 0.23589398562908173, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018878047168254853, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018878047168254853, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03989965319633484, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7913194444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.06814380064606666, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003989965561777354, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003989965561777354, "signal/format_reward/centered_abs_mean": 0.02988281212747097, "signal/format_reward/group_bin_occupancy": 0.1545138888888889, "signal/format_reward/group_std_mean": 0.05654868856072426, "signal/format_reward/group_zero_std_frac": 0.7638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014941406063735485, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014941406063735485, "signal/frontier_aurc_reward/centered_abs_mean": 0.001725417748093605, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6888888888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0028228630777448415, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.156772206944879e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.156772206944879e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_0/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_0/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_1/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_1/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_1/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_10/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_10/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_10/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_15/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_15/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_15/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_20/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_20/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_20/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_25/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_25/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_25/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_5/centered_abs_mean": 0.20168729424476622, "signal/frontier_coverage_5/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_5/group_std_mean": 0.26654154658317564, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002521091140806675, "signal/frontier_ece_reward/centered_abs_mean": 0.06174532324075699, "signal/frontier_ece_reward/group_bin_occupancy": 0.8194444444444444, "signal/frontier_ece_reward/group_std_mean": 0.0827798992395401, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0061745323240756985, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0061745323240756985, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34277850985527036, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.784375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41563859581947327, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03427785262465477, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03427785262465477, "step": 65 }, { "calibration/aurc": 0.31782818807649266, "calibration/batch_distribution_entropy": 0.9713867058593466, "calibration/batch_entropy_100bins": 0.9582963574502891, "calibration/batch_entropy_10bins": 0.9713867058593466, "calibration/batch_entropy_50bins": 0.9697426974943962, "calibration/batch_uniqueness": 0.9517092771582559, "calibration/buffer_distribution_entropy": 0.865434024133957, "calibration/buffer_entropy_100bins": 0.8346670094847349, "calibration/buffer_entropy_10bins": 0.865434024133957, "calibration/buffer_entropy_50bins": 0.8684468540776882, "calibration/confidence_entropy": 0.5156155878893331, "calibration/coverage@0%": 0.014809752256277365, "calibration/coverage@1%": 0.014809752256277365, "calibration/coverage@10%": 0.0197959018407649, "calibration/coverage@15%": 0.06292728718017454, "calibration/coverage@20%": 0.14343859835284306, "calibration/coverage@25%": 0.23316434400479813, "calibration/coverage@30%": 0.44321551191720443, "calibration/coverage@5%": 0.01647180211777321, "calibration/ece": 0.15868943196729363, "calibration/mean_confidence": 0.5855180447606176, "calibration/prompt_uniqueness": 0.8879748980284203, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022395833333333327, "completions/max_length": 3928.4, "completions/max_terminated_length": 3928.4, "completions/mean_length": 928.1142456054688, "completions/mean_terminated_length": 949.37958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 232.8, "epoch": 0.16799790002624967, "grad_norm": 0.0003384539159014821, "learning_rate": 4.156626506024097e-06, "loss": -0.0189, "num_tokens": 150989143.0, "reward": 0.9484674572944641, "reward_std": 0.15824552178382872, "rewards/accuracy_reward": 0.6380208253860473, "rewards/brier_reward": 0.7282225608825683, "rewards/confidence_uniqueness_reward": 0.9304485440254211, "rewards/format_reward": 0.9771701455116272, "rewards/frontier_aurc_reward": -0.0020523122744634747, "rewards/frontier_coverage_0": -0.02554969172924757, "rewards/frontier_coverage_1": -0.02554969172924757, "rewards/frontier_coverage_10": -0.02554969172924757, "rewards/frontier_coverage_15": -0.02554969172924757, "rewards/frontier_coverage_20": -0.02554969172924757, "rewards/frontier_coverage_25": -0.02554969172924757, "rewards/frontier_coverage_5": -0.02554969172924757, "rewards/frontier_ece_reward": 0.012574984692037106, "rewards/frontier_entropy_batch_reward": -0.23991408348083496, "signal/accuracy_reward/centered_abs_mean": 0.17412109375, "signal/accuracy_reward/group_bin_occupancy": 0.20763888888888887, "signal/accuracy_reward/group_std_mean": 0.22977908849716186, "signal/accuracy_reward/group_zero_std_frac": 0.33888890147209166, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.087060546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.087060546875, "signal/advantage_abs_mean": 0.11712085604667663, "signal/advantage_pre_scale_abs_mean": 0.11712085604667663, "signal/advantage_pre_scale_std": 0.1813085436820984, "signal/advantage_std": 0.1813085436820984, "signal/brier_reward/centered_abs_mean": 0.1955260753631592, "signal/brier_reward/group_bin_occupancy": 0.8989583333333334, "signal/brier_reward/group_std_mean": 0.24234116375446318, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01955260746181011, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01955260746181011, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04565142020583153, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7760416666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.07568179368972779, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004565142141655087, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004565142141655087, "signal/format_reward/centered_abs_mean": 0.0373426653444767, "signal/format_reward/group_bin_occupancy": 0.15694444444444444, "signal/format_reward/group_std_mean": 0.06606786623597145, "signal/format_reward/group_zero_std_frac": 0.7444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01867133267223835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01867133267223835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018319447292014957, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6729166666666666, "signal/frontier_aurc_reward/group_std_mean": 0.003036691714078188, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.289931035193149e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.289931035193149e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_0/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_0/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_1/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_1/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_1/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_10/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_10/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_10/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_15/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_15/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_15/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_20/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_20/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_20/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_25/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_25/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_25/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_5/centered_abs_mean": 0.2180047571659088, "signal/frontier_coverage_5/group_bin_occupancy": 0.8857638888888889, "signal/frontier_coverage_5/group_std_mean": 0.2845282912254333, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027250594459474085, "signal/frontier_ece_reward/centered_abs_mean": 0.06324872821569442, "signal/frontier_ece_reward/group_bin_occupancy": 0.8138888888888889, "signal/frontier_ece_reward/group_std_mean": 0.08291356414556503, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0063248731195926665, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0063248731195926665, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3225190699100494, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7753472222222222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3976904392242432, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03225190676748753, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225190676748753, "step": 70 }, { "calibration/aurc": 0.23721066123861628, "calibration/batch_distribution_entropy": 0.9523450697736399, "calibration/batch_entropy_100bins": 0.9462079878353921, "calibration/batch_entropy_10bins": 0.9523450697736399, "calibration/batch_entropy_50bins": 0.9576539125810644, "calibration/batch_uniqueness": 0.9472363243630035, "calibration/buffer_distribution_entropy": 0.8763054419438709, "calibration/buffer_entropy_100bins": 0.8520378003159907, "calibration/buffer_entropy_10bins": 0.8763054419438709, "calibration/buffer_entropy_50bins": 0.882087051862643, "calibration/confidence_entropy": 0.5010399725323659, "calibration/coverage@0%": 0.0074014091601529945, "calibration/coverage@1%": 0.0074014091601529945, "calibration/coverage@10%": 0.3165677151318652, "calibration/coverage@15%": 0.3818958107864078, "calibration/coverage@20%": 0.4402204226199299, "calibration/coverage@25%": 0.5509094875204943, "calibration/coverage@30%": 0.7209270657434209, "calibration/coverage@5%": 0.09826302795910861, "calibration/ece": 0.1836193855088879, "calibration/mean_confidence": 0.6173291182052798, "calibration/prompt_uniqueness": 0.8779982358091448, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111093, "completions/max_length": 3881.4, "completions/max_terminated_length": 3881.4, "completions/mean_length": 888.530126953125, "completions/mean_terminated_length": 901.3885498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 272.0, "epoch": 0.17999775002812465, "grad_norm": 0.0003190806892234832, "learning_rate": 4.006024096385543e-06, "loss": -0.0115, "num_tokens": 164289906.0, "reward": 0.9812763333320618, "reward_std": 0.1455370843410492, "rewards/accuracy_reward": 0.6936632037162781, "rewards/brier_reward": 0.7690490484237671, "rewards/confidence_uniqueness_reward": 0.9352475523948669, "rewards/format_reward": 0.9855034828186036, "rewards/frontier_aurc_reward": -0.0016912945546209812, "rewards/frontier_coverage_0": -0.023174118530005217, "rewards/frontier_coverage_1": -0.023174118530005217, "rewards/frontier_coverage_10": -0.023174118530005217, "rewards/frontier_coverage_15": -0.023174118530005217, "rewards/frontier_coverage_20": -0.023174118530005217, "rewards/frontier_coverage_25": -0.023174118530005217, "rewards/frontier_coverage_5": -0.023174118530005217, "rewards/frontier_ece_reward": 0.02147761546075344, "rewards/frontier_entropy_batch_reward": -0.2883553385734558, "signal/accuracy_reward/centered_abs_mean": 0.17032877504825591, "signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888, "signal/accuracy_reward/group_std_mean": 0.2250364065170288, "signal/accuracy_reward/group_zero_std_frac": 0.36388888359069826, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08516438752412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08516438752412796, "signal/advantage_abs_mean": 0.10705152153968811, "signal/advantage_pre_scale_abs_mean": 0.10705152153968811, "signal/advantage_pre_scale_std": 0.17231981456279755, "signal/advantage_std": 0.17231981456279755, "signal/brier_reward/centered_abs_mean": 0.1728837013244629, "signal/brier_reward/group_bin_occupancy": 0.8597222222222222, "signal/brier_reward/group_std_mean": 0.21893222033977508, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017288370057940483, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017288370057940483, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03745027519762516, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8152777777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.06325004473328591, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037450275383889677, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037450275383889677, "signal/format_reward/centered_abs_mean": 0.02566731758415699, "signal/format_reward/group_bin_occupancy": 0.15069444444444444, "signal/format_reward/group_std_mean": 0.04961967393755913, "signal/format_reward/group_zero_std_frac": 0.7944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012833658792078494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012833658792078494, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017779430374503136, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6729166666666666, "signal/frontier_aurc_reward/group_std_mean": 0.002878274582326412, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2224288841243833e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2224288841243833e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_0/group_bin_occupancy": 0.85625, "signal/frontier_coverage_0/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_1/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_1/group_bin_occupancy": 0.85625, "signal/frontier_coverage_1/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_10/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_10/group_bin_occupancy": 0.85625, "signal/frontier_coverage_10/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_15/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_15/group_bin_occupancy": 0.85625, "signal/frontier_coverage_15/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_20/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_20/group_bin_occupancy": 0.85625, "signal/frontier_coverage_20/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_25/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_25/group_bin_occupancy": 0.85625, "signal/frontier_coverage_25/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_5/centered_abs_mean": 0.19454073309898376, "signal/frontier_coverage_5/group_bin_occupancy": 0.85625, "signal/frontier_coverage_5/group_std_mean": 0.26065097451210023, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002431759191676974, "signal/frontier_ece_reward/centered_abs_mean": 0.06184743866324425, "signal/frontier_ece_reward/group_bin_occupancy": 0.7597222222222222, "signal/frontier_ece_reward/group_std_mean": 0.07970805168151855, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006184743903577328, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006184743903577328, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34849226474761963, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7798611111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41787471175193786, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034849225729703906, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034849225729703906, "step": 75 }, { "calibration/aurc": 0.2092779274411888, "calibration/batch_distribution_entropy": 0.9617838823741687, "calibration/batch_entropy_100bins": 0.950011756254557, "calibration/batch_entropy_10bins": 0.9617838823741687, "calibration/batch_entropy_50bins": 0.9600976364704195, "calibration/batch_uniqueness": 0.9488157255909364, "calibration/buffer_distribution_entropy": 0.8853217697278936, "calibration/buffer_entropy_100bins": 0.8662557301452016, "calibration/buffer_entropy_10bins": 0.8853217697278936, "calibration/buffer_entropy_50bins": 0.8928601468050562, "calibration/confidence_entropy": 0.4877562323287636, "calibration/coverage@0%": 0.026958699293814493, "calibration/coverage@1%": 0.026958699293814493, "calibration/coverage@10%": 0.26216189051794814, "calibration/coverage@15%": 0.533117630289133, "calibration/coverage@20%": 0.5696959461704593, "calibration/coverage@25%": 0.6466096767515254, "calibration/coverage@30%": 0.7402521406135708, "calibration/coverage@5%": 0.0507682231033383, "calibration/ece": 0.20460225649868646, "calibration/mean_confidence": 0.5915577899032075, "calibration/prompt_uniqueness": 0.866834247942902, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01684027777777779, "completions/max_length": 3994.8, "completions/max_terminated_length": 3994.8, "completions/mean_length": 904.8212768554688, "completions/mean_terminated_length": 920.4162353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 252.8, "epoch": 0.19199760002999963, "grad_norm": 0.00035350897815078497, "learning_rate": 3.855421686746989e-06, "loss": -0.0139, "num_tokens": 177766727.0, "reward": 0.9613214373588562, "reward_std": 0.1463531583547592, "rewards/accuracy_reward": 0.6480034708976745, "rewards/brier_reward": 0.7514761567115784, "rewards/confidence_uniqueness_reward": 0.9352750539779663, "rewards/format_reward": 0.9829861164093018, "rewards/frontier_aurc_reward": -0.001857876474969089, "rewards/frontier_coverage_0": -0.00018857438117265702, "rewards/frontier_coverage_1": -0.00018857438117265702, "rewards/frontier_coverage_10": -0.00018857438117265702, "rewards/frontier_coverage_15": -0.00018857438117265702, "rewards/frontier_coverage_20": -0.00018857438117265702, "rewards/frontier_coverage_25": -0.00018857438117265702, "rewards/frontier_coverage_5": -0.00018857438117265702, "rewards/frontier_ece_reward": 0.018000571243464946, "rewards/frontier_entropy_batch_reward": -0.24608825147151947, "signal/accuracy_reward/centered_abs_mean": 0.17750108242034912, "signal/accuracy_reward/group_bin_occupancy": 0.20868055555555554, "signal/accuracy_reward/group_std_mean": 0.2343847006559372, "signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08875054121017456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08875054121017456, "signal/advantage_abs_mean": 0.10952122509479523, "signal/advantage_pre_scale_abs_mean": 0.10952122509479523, "signal/advantage_pre_scale_std": 0.16926259696483612, "signal/advantage_std": 0.16926259696483612, "signal/brier_reward/centered_abs_mean": 0.17829698026180268, "signal/brier_reward/group_bin_occupancy": 0.8697916666666667, "signal/brier_reward/group_std_mean": 0.22418507933616638, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01782969757914543, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01782969757914543, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03583449199795723, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8104166666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.05993206053972244, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035834492649883033, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035834492649883033, "signal/format_reward/centered_abs_mean": 0.02592230886220932, "signal/format_reward/group_bin_occupancy": 0.15034722222222222, "signal/format_reward/group_std_mean": 0.048324061557650566, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01296115443110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01296115443110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017847379669547081, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6881944444444444, "signal/frontier_aurc_reward/group_std_mean": 0.0027909183874726294, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2309225460048766e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2309225460048766e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_0/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_1/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_1/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_10/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_10/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_15/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_15/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_20/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_20/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_25/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_25/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_5/centered_abs_mean": 0.2202287882566452, "signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222222, "signal/frontier_coverage_5/group_std_mean": 0.2893898367881775, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027528597973287107, "signal/frontier_ece_reward/centered_abs_mean": 0.059304585307836534, "signal/frontier_ece_reward/group_bin_occupancy": 0.7579861111111111, "signal/frontier_ece_reward/group_std_mean": 0.07622785717248917, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005930458568036557, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005930458568036557, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3187947154045105, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773611111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39364359378814695, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03187947124242783, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03187947124242783, "step": 80 }, { "calibration/aurc": 0.20534175522924727, "calibration/batch_distribution_entropy": 0.969917266218302, "calibration/batch_entropy_100bins": 0.952227152312498, "calibration/batch_entropy_10bins": 0.969917266218302, "calibration/batch_entropy_50bins": 0.9629528550247466, "calibration/batch_uniqueness": 0.9497679294785953, "calibration/buffer_distribution_entropy": 0.8954276213227133, "calibration/buffer_entropy_100bins": 0.8788468049639422, "calibration/buffer_entropy_10bins": 0.8954276213227133, "calibration/buffer_entropy_50bins": 0.9029583142175056, "calibration/confidence_entropy": 0.47593418558783557, "calibration/coverage@0%": 0.01591605755730271, "calibration/coverage@1%": 0.01591605755730271, "calibration/coverage@10%": 0.13208137867099776, "calibration/coverage@15%": 0.39615967842103883, "calibration/coverage@20%": 0.5639172130983358, "calibration/coverage@25%": 0.7288010871896423, "calibration/coverage@30%": 0.8382724713232088, "calibration/coverage@5%": 0.020728891781901636, "calibration/ece": 0.15006383558851266, "calibration/mean_confidence": 0.5456087756371643, "calibration/prompt_uniqueness": 0.8688496853362396, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01293402777777779, "completions/max_length": 3779.0, "completions/max_terminated_length": 3779.0, "completions/mean_length": 907.5528686523437, "completions/mean_terminated_length": 919.5334716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 307.2, "epoch": 0.2039974500318746, "grad_norm": 0.00032232736703008413, "learning_rate": 3.7048192771084342e-06, "loss": -0.0112, "num_tokens": 191308936.0, "reward": 0.9826178789138794, "reward_std": 0.14128702878952026, "rewards/accuracy_reward": 0.6888020753860473, "rewards/brier_reward": 0.7758087396621705, "rewards/confidence_uniqueness_reward": 0.9368537425994873, "rewards/format_reward": 0.986718761920929, "rewards/frontier_aurc_reward": -0.0015183656942099333, "rewards/frontier_coverage_0": -0.006013031769543886, "rewards/frontier_coverage_1": -0.006013031769543886, "rewards/frontier_coverage_10": -0.006013031769543886, "rewards/frontier_coverage_15": -0.006013031769543886, "rewards/frontier_coverage_20": -0.006013031769543886, "rewards/frontier_coverage_25": -0.006013031769543886, "rewards/frontier_coverage_5": -0.006013031769543886, "rewards/frontier_ece_reward": 0.022591342404484748, "rewards/frontier_entropy_batch_reward": -0.2812282383441925, "signal/accuracy_reward/centered_abs_mean": 0.17081705629825591, "signal/accuracy_reward/group_bin_occupancy": 0.20590277777777777, "signal/accuracy_reward/group_std_mean": 0.2259347140789032, "signal/accuracy_reward/group_zero_std_frac": 0.35277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08540852814912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08540852814912796, "signal/advantage_abs_mean": 0.10153568834066391, "signal/advantage_pre_scale_abs_mean": 0.10153568834066391, "signal/advantage_pre_scale_std": 0.16390545070171356, "signal/advantage_std": 0.16390545070171356, "signal/brier_reward/centered_abs_mean": 0.1676138609647751, "signal/brier_reward/group_bin_occupancy": 0.8409722222222221, "signal/brier_reward/group_std_mean": 0.21286478340625764, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016761386021971702, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016761386021971702, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0346285417675972, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7895833333333333, "signal/confidence_uniqueness_reward/group_std_mean": 0.0630292072892189, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003462854353711009, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003462854353711009, "signal/format_reward/centered_abs_mean": 0.02394205704331398, "signal/format_reward/group_bin_occupancy": 0.15381944444444445, "signal/format_reward/group_std_mean": 0.050843673199415206, "signal/format_reward/group_zero_std_frac": 0.7694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01197102852165699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01197102852165699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016425102250650526, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222222, "signal/frontier_aurc_reward/group_std_mean": 0.0026286729145795105, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.053137832263019e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.053137832263019e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_0/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_0/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_1/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_1/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_1/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_10/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_10/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_10/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_15/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_15/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_15/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_20/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_20/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_20/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_25/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_25/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_25/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_5/centered_abs_mean": 0.21044284403324126, "signal/frontier_coverage_5/group_bin_occupancy": 0.8260416666666668, "signal/frontier_coverage_5/group_std_mean": 0.2772384166717529, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026305356062948705, "signal/frontier_ece_reward/centered_abs_mean": 0.05771494954824448, "signal/frontier_ece_reward/group_bin_occupancy": 0.7440972222222222, "signal/frontier_ece_reward/group_std_mean": 0.07275837063789367, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005771494936197996, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005771494936197996, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3331539690494537, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7829861111111112, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40554880499839785, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0333153985440731, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0333153985440731, "step": 85 }, { "calibration/aurc": 0.14146831607507013, "calibration/batch_distribution_entropy": 0.9788194244148739, "calibration/batch_entropy_100bins": 0.9608438585211108, "calibration/batch_entropy_10bins": 0.9788194244148739, "calibration/batch_entropy_50bins": 0.973365189284614, "calibration/batch_uniqueness": 0.9523141972909981, "calibration/buffer_distribution_entropy": 0.9043309546471272, "calibration/buffer_entropy_100bins": 0.8901910730938212, "calibration/buffer_entropy_10bins": 0.9043309546471272, "calibration/buffer_entropy_50bins": 0.9122256900315284, "calibration/confidence_entropy": 0.4913180920528314, "calibration/coverage@0%": 0.0627593206678165, "calibration/coverage@1%": 0.11067598733448314, "calibration/coverage@10%": 0.41181837997686416, "calibration/coverage@15%": 0.6135317774283345, "calibration/coverage@20%": 0.7221224409632029, "calibration/coverage@25%": 0.8329313631163793, "calibration/coverage@30%": 0.9034970794433189, "calibration/coverage@5%": 0.2677955394134996, "calibration/ece": 0.18102057753858167, "calibration/mean_confidence": 0.5288751475055655, "calibration/prompt_uniqueness": 0.8661224631367853, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3679.8, "completions/max_terminated_length": 3679.8, "completions/mean_length": 900.6781494140625, "completions/mean_terminated_length": 912.1487060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 268.2, "epoch": 0.2159973000337496, "grad_norm": 0.00032432310399599373, "learning_rate": 3.5542168674698798e-06, "loss": -0.0095, "num_tokens": 204753420.0, "reward": 0.9828698992729187, "reward_std": 0.13062580227851867, "rewards/accuracy_reward": 0.6809027671813965, "rewards/brier_reward": 0.7742850184440613, "rewards/confidence_uniqueness_reward": 0.9397097945213317, "rewards/format_reward": 0.9869791746139527, "rewards/frontier_aurc_reward": -0.0012651005061343312, "rewards/frontier_coverage_0": -0.0024590507615357637, "rewards/frontier_coverage_1": -0.0024590507615357637, "rewards/frontier_coverage_10": -0.0024590507615357637, "rewards/frontier_coverage_15": -0.0024590507615357637, "rewards/frontier_coverage_20": -0.0024590507615357637, "rewards/frontier_coverage_25": -0.0024590507615357637, "rewards/frontier_coverage_5": -0.0024590507615357637, "rewards/frontier_ece_reward": 0.01990157924592495, "rewards/frontier_entropy_batch_reward": -0.2422977238893509, "signal/accuracy_reward/centered_abs_mean": 0.1625976547598839, "signal/accuracy_reward/group_bin_occupancy": 0.2, "signal/accuracy_reward/group_std_mean": 0.21232767403125763, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08129882737994194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08129882737994194, "signal/advantage_abs_mean": 0.0974230170249939, "signal/advantage_pre_scale_abs_mean": 0.0974230170249939, "signal/advantage_pre_scale_std": 0.1547566443681717, "signal/advantage_std": 0.1547566443681717, "signal/brier_reward/centered_abs_mean": 0.16137183904647828, "signal/brier_reward/group_bin_occupancy": 0.8625, "signal/brier_reward/group_std_mean": 0.20378568768501282, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01613718457520008, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01613718457520008, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03156536892056465, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.835763888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.05195377618074417, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003156536910682917, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003156536910682917, "signal/format_reward/centered_abs_mean": 0.02133246473968029, "signal/format_reward/group_bin_occupancy": 0.14513888888888887, "signal/format_reward/group_std_mean": 0.03966722339391708, "signal/format_reward/group_zero_std_frac": 0.8388888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010666232369840146, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010666232369840146, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012749084737151862, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6826388888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0020892760483548047, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.593635715835262e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.593635715835262e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_0/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_1/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_1/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_10/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_10/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_15/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_15/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_20/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_20/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_25/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_25/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_5/centered_abs_mean": 0.22104499042034148, "signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333332, "signal/frontier_coverage_5/group_std_mean": 0.2860799193382263, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027630624361336233, "signal/frontier_ece_reward/centered_abs_mean": 0.05326760783791542, "signal/frontier_ece_reward/group_bin_occupancy": 0.7274305555555556, "signal/frontier_ece_reward/group_std_mean": 0.06732185631990432, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005326761025935411, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005326761025935411, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31390817165374757, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7652777777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38645762801170347, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03139082007110119, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03139082007110119, "step": 90 }, { "calibration/aurc": 0.16972575624076253, "calibration/batch_distribution_entropy": 0.968708935722565, "calibration/batch_entropy_100bins": 0.9597854119619518, "calibration/batch_entropy_10bins": 0.968708935722565, "calibration/batch_entropy_50bins": 0.9676675002930907, "calibration/batch_uniqueness": 0.9505702036499282, "calibration/buffer_distribution_entropy": 0.9138201935329576, "calibration/buffer_entropy_100bins": 0.9005677377182005, "calibration/buffer_entropy_10bins": 0.9138201935329576, "calibration/buffer_entropy_50bins": 0.9210036485318283, "calibration/confidence_entropy": 0.49206090349416975, "calibration/coverage@0%": 0.03164220991292743, "calibration/coverage@1%": 0.03164220991292743, "calibration/coverage@10%": 0.49474073715796, "calibration/coverage@15%": 0.6049408285432979, "calibration/coverage@20%": 0.6640028793343454, "calibration/coverage@25%": 0.712034532958265, "calibration/coverage@30%": 0.7631684293337407, "calibration/coverage@5%": 0.2814550861583458, "calibration/ece": 0.1706881800323085, "calibration/mean_confidence": 0.5701669649175536, "calibration/prompt_uniqueness": 0.8606523882209853, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555559, "completions/max_length": 3808.6, "completions/max_terminated_length": 3808.6, "completions/mean_length": 934.8341186523437, "completions/mean_terminated_length": 946.0338989257813, "completions/min_length": 0.0, "completions/min_terminated_length": 274.8, "epoch": 0.22799715003562457, "grad_norm": 0.0003469188523013145, "learning_rate": 3.4036144578313257e-06, "loss": -0.0099, "num_tokens": 218614389.0, "reward": 0.9796857237815857, "reward_std": 0.13000356405973434, "rewards/accuracy_reward": 0.6736979126930237, "rewards/brier_reward": 0.7816197514533997, "rewards/confidence_uniqueness_reward": 0.9394507527351379, "rewards/format_reward": 0.9880208253860474, "rewards/frontier_aurc_reward": -0.0013940044911578298, "rewards/frontier_coverage_0": 0.0035892575513571503, "rewards/frontier_coverage_1": 0.0035892575513571503, "rewards/frontier_coverage_10": 0.0035892575513571503, "rewards/frontier_coverage_15": 0.0035892575513571503, "rewards/frontier_coverage_20": 0.0035892575513571503, "rewards/frontier_coverage_25": 0.0035892575513571503, "rewards/frontier_coverage_5": 0.0035892575513571503, "rewards/frontier_ece_reward": 0.02155197449028492, "rewards/frontier_entropy_batch_reward": -0.25732521414756776, "signal/accuracy_reward/centered_abs_mean": 0.15018988847732545, "signal/accuracy_reward/group_bin_occupancy": 0.19826388888888888, "signal/accuracy_reward/group_std_mean": 0.20334359407424926, "signal/accuracy_reward/group_zero_std_frac": 0.4138889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07509494423866273, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07509494423866273, "signal/advantage_abs_mean": 0.09411217570304871, "signal/advantage_pre_scale_abs_mean": 0.09411217570304871, "signal/advantage_pre_scale_std": 0.15418358743190766, "signal/advantage_std": 0.15418358743190766, "signal/brier_reward/centered_abs_mean": 0.14829140901565552, "signal/brier_reward/group_bin_occupancy": 0.8458333333333334, "signal/brier_reward/group_std_mean": 0.18972561955451966, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014829141087830067, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014829141087830067, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030963774770498276, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8368055555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.05300363451242447, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003096377523615956, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003096377523615956, "signal/format_reward/centered_abs_mean": 0.02012803815305233, "signal/format_reward/group_bin_occupancy": 0.14652777777777778, "signal/format_reward/group_std_mean": 0.04024533927440643, "signal/format_reward/group_zero_std_frac": 0.8277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010064019076526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010064019076526164, "signal/frontier_aurc_reward/centered_abs_mean": 0.001483507757075131, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222222, "signal/frontier_aurc_reward/group_std_mean": 0.0023828324396163226, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8543847727414686e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8543847727414686e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_0/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_0/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_1/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_1/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_1/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_10/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_10/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_10/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_15/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_15/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_15/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_20/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_20/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_20/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_25/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_25/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_25/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_5/centered_abs_mean": 0.1866928219795227, "signal/frontier_coverage_5/group_bin_occupancy": 0.8479166666666667, "signal/frontier_coverage_5/group_std_mean": 0.24621494710445405, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023336603306233885, "signal/frontier_ece_reward/centered_abs_mean": 0.049095044285058974, "signal/frontier_ece_reward/group_bin_occupancy": 0.7041666666666667, "signal/frontier_ece_reward/group_std_mean": 0.062035161256790164, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00490950457751751, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00490950457751751, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31598699688911436, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7607638888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38732577562332154, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03159870021045208, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03159870021045208, "step": 95 }, { "calibration/aurc": 0.1468086902819562, "calibration/batch_distribution_entropy": 0.9644228229125108, "calibration/batch_entropy_100bins": 0.951640068404739, "calibration/batch_entropy_10bins": 0.9644228229125108, "calibration/batch_entropy_50bins": 0.9639704299471431, "calibration/batch_uniqueness": 0.9486976020018496, "calibration/buffer_distribution_entropy": 0.9181791650650444, "calibration/buffer_entropy_100bins": 0.9078529948938329, "calibration/buffer_entropy_10bins": 0.9181791650650444, "calibration/buffer_entropy_50bins": 0.9265357528580778, "calibration/confidence_entropy": 0.48844466923525676, "calibration/coverage@0%": 0.059730258076577517, "calibration/coverage@1%": 0.059730258076577517, "calibration/coverage@10%": 0.44518814058922684, "calibration/coverage@15%": 0.5958507215879982, "calibration/coverage@20%": 0.7796840966967775, "calibration/coverage@25%": 0.868777681347203, "calibration/coverage@30%": 0.9167861409796894, "calibration/coverage@5%": 0.14907013331665558, "calibration/ece": 0.15105889999404112, "calibration/mean_confidence": 0.5653978757621623, "calibration/prompt_uniqueness": 0.865446176856419, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017013888888888884, "completions/max_length": 3786.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 975.0357666015625, "completions/mean_terminated_length": 991.8107543945313, "completions/min_length": 0.0, "completions/min_terminated_length": 312.8, "epoch": 0.23999700003749952, "grad_norm": 0.0003270139859523624, "learning_rate": 3.2530120481927713e-06, "loss": -0.0123, "num_tokens": 232945873.0, "reward": 0.9823734283447265, "reward_std": 0.1380382299423218, "rewards/accuracy_reward": 0.6821180582046509, "rewards/brier_reward": 0.8005537033081055, "rewards/confidence_uniqueness_reward": 0.9328471422195435, "rewards/format_reward": 0.982812511920929, "rewards/frontier_aurc_reward": -0.001223186100833118, "rewards/frontier_coverage_0": 0.022282357234507798, "rewards/frontier_coverage_1": 0.022282357234507798, "rewards/frontier_coverage_10": 0.022282357234507798, "rewards/frontier_coverage_15": 0.022282357234507798, "rewards/frontier_coverage_20": 0.022282357234507798, "rewards/frontier_coverage_25": 0.022282357234507798, "rewards/frontier_coverage_5": 0.022282357234507798, "rewards/frontier_ece_reward": 0.024563415348529814, "rewards/frontier_entropy_batch_reward": -0.2782270163297653, "signal/accuracy_reward/centered_abs_mean": 0.16501736044883727, "signal/accuracy_reward/group_bin_occupancy": 0.20208333333333334, "signal/accuracy_reward/group_std_mean": 0.21692525148391723, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250868022441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08250868022441864, "signal/advantage_abs_mean": 0.1007988765835762, "signal/advantage_pre_scale_abs_mean": 0.1007988765835762, "signal/advantage_pre_scale_std": 0.16335872411727906, "signal/advantage_std": 0.16335872411727906, "signal/brier_reward/centered_abs_mean": 0.142218279838562, "signal/brier_reward/group_bin_occupancy": 0.820486111111111, "signal/brier_reward/group_std_mean": 0.18417510092258454, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014221827685832977, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014221827685832977, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03697417117655277, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8086805555555555, "signal/confidence_uniqueness_reward/group_std_mean": 0.06256948933005332, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003697417164221406, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003697417164221406, "signal/format_reward/centered_abs_mean": 0.02630208320915699, "signal/format_reward/group_bin_occupancy": 0.15069444444444446, "signal/format_reward/group_std_mean": 0.05004433616995811, "signal/format_reward/group_zero_std_frac": 0.794444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013151041604578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013151041604578495, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013734675711020827, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6788194444444444, "signal/frontier_aurc_reward/group_std_mean": 0.0022251688642427325, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7168344675155824e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7168344675155824e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_0/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_0/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_1/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_1/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_1/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_10/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_10/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_10/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_15/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_15/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_15/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_20/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_20/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_20/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_25/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_25/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_25/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_5/centered_abs_mean": 0.18763003647327423, "signal/frontier_coverage_5/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_5/group_std_mean": 0.2453687906265259, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023453754372894766, "signal/frontier_ece_reward/centered_abs_mean": 0.0460667222738266, "signal/frontier_ece_reward/group_bin_occupancy": 0.6913194444444444, "signal/frontier_ece_reward/group_std_mean": 0.058010222762823103, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0046066722832620146, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0046066722832620146, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3246371805667877, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7493055555555557, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3948457419872284, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032463718205690384, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032463718205690384, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.1232624973726325, "eval_calibration/batch_distribution_entropy": 0.8948085890484774, "eval_calibration/batch_entropy_100bins": 0.7040956292401587, "eval_calibration/batch_entropy_10bins": 0.8948085890484774, "eval_calibration/batch_entropy_50bins": 0.7832705217175612, "eval_calibration/batch_uniqueness": 0.8917035206382241, "eval_calibration/buffer_distribution_entropy": 0.922497509059878, "eval_calibration/buffer_entropy_100bins": 0.912813983946152, "eval_calibration/buffer_entropy_10bins": 0.922497509059878, "eval_calibration/buffer_entropy_50bins": 0.9306076485338187, "eval_calibration/confidence_entropy": 0.4828748758253563, "eval_calibration/coverage@0%": 0.24378360215053765, "eval_calibration/coverage@1%": 0.24378360215053765, "eval_calibration/coverage@10%": 0.6001344086021505, "eval_calibration/coverage@15%": 0.7688172043010754, "eval_calibration/coverage@20%": 0.8741599462365591, "eval_calibration/coverage@25%": 0.9321236559139785, "eval_calibration/coverage@30%": 0.9895833333333334, "eval_calibration/coverage@5%": 0.2921706989247312, "eval_calibration/ece": 0.22508465745817566, "eval_calibration/mean_confidence": 0.6127040160956362, "eval_calibration/prompt_uniqueness": 0.8917035206382241, "eval_completions/clipped_ratio": 0.010416666666666649, "eval_completions/max_length": 2943.5, "eval_completions/max_terminated_length": 2943.5, "eval_completions/mean_length": 961.2862955729166, "eval_completions/mean_terminated_length": 971.3759256998698, "eval_completions/min_length": 71.0, "eval_completions/min_terminated_length": 352.5, "eval_loss": 0.0, "eval_num_tokens": 232945873.0, "eval_reward": 0.9052010973294576, "eval_reward_std": 0.23245403667291006, "eval_rewards/accuracy_reward": 0.6796875, "eval_rewards/brier_reward": 0.7887685497601827, "eval_rewards/confidence_uniqueness_reward": 0.8840933938821157, "eval_rewards/format_reward": 0.9878472089767456, "eval_rewards/frontier_aurc_reward": -0.0014447161132314552, "eval_rewards/frontier_coverage_0": 0.0102703048226734, "eval_rewards/frontier_coverage_1": 0.0102703048226734, "eval_rewards/frontier_coverage_10": 0.0102703048226734, "eval_rewards/frontier_coverage_15": 0.0102703048226734, "eval_rewards/frontier_coverage_20": 0.0102703048226734, "eval_rewards/frontier_coverage_25": 0.0102703048226734, "eval_rewards/frontier_coverage_5": 0.0102703048226734, "eval_rewards/frontier_ece_reward": 0.020516497393449146, "eval_rewards/frontier_entropy_batch_reward": -0.9878472089767456, "eval_runtime": 211.7743, "eval_samples_per_second": 4.722, "eval_signal/accuracy_reward/centered_abs_mean": 0.4166124115387599, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4618180791536967, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20830620576937994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20830620576937994, "eval_signal/advantage_abs_mean": 0.19841948399941126, "eval_signal/advantage_pre_scale_abs_mean": 0.19841948399941126, "eval_signal/advantage_pre_scale_std": 0.23134330163399378, "eval_signal/advantage_std": 0.23134330163399378, "eval_signal/brier_reward/centered_abs_mean": 0.19762666523456573, "eval_signal/brier_reward/group_bin_occupancy": 0.8819444444444445, "eval_signal/brier_reward/group_std_mean": 0.25263360391060513, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01976266720642646, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01976266720642646, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05467540336151918, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3993055555555556, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09214186668395996, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005467540351673961, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005467540351673961, "eval_signal/format_reward/centered_abs_mean": 0.023328992693374555, "eval_signal/format_reward/group_bin_occupancy": 0.16666666666666666, "eval_signal/format_reward/group_std_mean": 0.06276767669866483, "eval_signal/format_reward/group_zero_std_frac": 0.6666666915019354, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496346687278, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.011664496346687278, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002142787619959563, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6770833333333334, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0038993366761133075, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6784844218733877e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6784844218733877e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_0/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_1/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_10/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_15/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_20/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_25/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.26335882892211276, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9409722222222223, "eval_signal/frontier_coverage_5/group_std_mean": 0.3692873766024907, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003291985446897646, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05410987697541714, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9131944444444445, "eval_signal/frontier_ece_reward/group_std_mean": 0.06782141576210658, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005410987806196014, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005410987806196014, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.023328992693374555, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.16666666666666666, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.06276767669866483, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666915019354, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002332899389633288, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002332899389633288, "eval_steps_per_second": 0.028, "step": 100 }, { "calibration/aurc": 0.2851160668444922, "calibration/batch_distribution_entropy": 0.9671729214309087, "calibration/batch_entropy_100bins": 0.9543574644103542, "calibration/batch_entropy_10bins": 0.9671729214309087, "calibration/batch_entropy_50bins": 0.9636134029781351, "calibration/batch_uniqueness": 0.9495695699430952, "calibration/buffer_distribution_entropy": 0.924664049380876, "calibration/buffer_entropy_100bins": 0.915730900885403, "calibration/buffer_entropy_10bins": 0.924664049380876, "calibration/buffer_entropy_50bins": 0.9328724668779979, "calibration/confidence_entropy": 0.49600341279363536, "calibration/coverage@0%": 0.060966568007277976, "calibration/coverage@1%": 0.08668835278418087, "calibration/coverage@10%": 0.13970672548759294, "calibration/coverage@15%": 0.26881728379574177, "calibration/coverage@20%": 0.3661078923001498, "calibration/coverage@25%": 0.4303547684019874, "calibration/coverage@30%": 0.5357389720911219, "calibration/coverage@5%": 0.12290882522512576, "calibration/ece": 0.1526015826672329, "calibration/mean_confidence": 0.5764491669243426, "calibration/prompt_uniqueness": 0.861689626065415, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333325, "completions/max_length": 3851.2, "completions/max_terminated_length": 3851.2, "completions/mean_length": 966.7627685546875, "completions/mean_terminated_length": 987.4223266601563, "completions/min_length": 0.0, "completions/min_terminated_length": 296.6, "epoch": 0.2519968500393745, "grad_norm": 0.0003603503864724189, "learning_rate": 3.1024096385542172e-06, "loss": -0.0174, "num_tokens": 247159844.0, "reward": 0.9762218475341797, "reward_std": 0.13867290019989015, "rewards/accuracy_reward": 0.6809895873069763, "rewards/brier_reward": 0.7864728808403015, "rewards/confidence_uniqueness_reward": 0.929021692276001, "rewards/format_reward": 0.9790798664093018, "rewards/frontier_aurc_reward": -0.0012768813758157194, "rewards/frontier_coverage_0": 0.010393311083316804, "rewards/frontier_coverage_1": 0.010393311083316804, "rewards/frontier_coverage_10": 0.010393311083316804, "rewards/frontier_coverage_15": 0.010393311083316804, "rewards/frontier_coverage_20": 0.010393311083316804, "rewards/frontier_coverage_25": 0.010393311083316804, "rewards/frontier_coverage_5": 0.010393311083316804, "rewards/frontier_ece_reward": 0.020966623350977896, "rewards/frontier_entropy_batch_reward": -0.283524689078331, "signal/accuracy_reward/centered_abs_mean": 0.1553656652569771, "signal/accuracy_reward/group_bin_occupancy": 0.20173611111111106, "signal/accuracy_reward/group_std_mean": 0.210291787981987, "signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07768283262848855, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07768283262848855, "signal/advantage_abs_mean": 0.10010195821523667, "signal/advantage_pre_scale_abs_mean": 0.10010195821523667, "signal/advantage_pre_scale_std": 0.16518112421035766, "signal/advantage_std": 0.16518112421035766, "signal/brier_reward/centered_abs_mean": 0.14650782942771912, "signal/brier_reward/group_bin_occupancy": 0.825, "signal/brier_reward/group_std_mean": 0.18895745873451233, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014650783315300942, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014650783315300942, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04202488660812378, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8017361111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.0678424745798111, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004202488483861089, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004202488483861089, "signal/format_reward/centered_abs_mean": 0.03167860247194767, "signal/format_reward/group_bin_occupancy": 0.15208333333333332, "signal/format_reward/group_std_mean": 0.05567045882344246, "signal/format_reward/group_zero_std_frac": 0.7833333611488342, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015839301235973834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015839301235973834, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014219350181519986, "signal/frontier_aurc_reward/group_bin_occupancy": 0.671875, "signal/frontier_aurc_reward/group_std_mean": 0.0023063634755089877, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7774187836039345e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7774187836039345e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_0/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_0/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_1/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_1/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_1/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_10/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_10/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_10/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_15/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_15/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_15/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_20/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_20/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_20/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_25/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_25/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_25/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_5/centered_abs_mean": 0.18595612347126006, "signal/frontier_coverage_5/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_5/group_std_mean": 0.24536578357219696, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023244516225531696, "signal/frontier_ece_reward/centered_abs_mean": 0.04364292547106743, "signal/frontier_ece_reward/group_bin_occupancy": 0.6725694444444444, "signal/frontier_ece_reward/group_std_mean": 0.05495603755116463, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004364292602986097, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004364292602986097, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32798747420310975, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7527777777777777, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39967008829116824, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03279874660074711, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03279874660074711, "step": 105 }, { "calibration/aurc": 0.1483170665199357, "calibration/batch_distribution_entropy": 0.9387725486299472, "calibration/batch_entropy_100bins": 0.9385953694946148, "calibration/batch_entropy_10bins": 0.9387725486299472, "calibration/batch_entropy_50bins": 0.947461516265483, "calibration/batch_uniqueness": 0.9434469786987381, "calibration/buffer_distribution_entropy": 0.9278483560380801, "calibration/buffer_entropy_100bins": 0.9212316856289691, "calibration/buffer_entropy_10bins": 0.9278483560380801, "calibration/buffer_entropy_50bins": 0.9368451531102391, "calibration/confidence_entropy": 0.4558293825239885, "calibration/coverage@0%": 0.059290959926182385, "calibration/coverage@1%": 0.059290959926182385, "calibration/coverage@10%": 0.36697772902705544, "calibration/coverage@15%": 0.5974388122980647, "calibration/coverage@20%": 0.7156884661286804, "calibration/coverage@25%": 0.8580991124280738, "calibration/coverage@30%": 0.956633186340756, "calibration/coverage@5%": 0.245650904176041, "calibration/ece": 0.13348184934245935, "calibration/mean_confidence": 0.607258804172381, "calibration/prompt_uniqueness": 0.8473293014447923, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015538194444444441, "completions/max_length": 3861.8, "completions/max_terminated_length": 3861.8, "completions/mean_length": 1001.0733642578125, "completions/mean_terminated_length": 1016.8860473632812, "completions/min_length": 0.0, "completions/min_terminated_length": 303.6, "epoch": 0.2639967000412495, "grad_norm": 0.00029901484958827496, "learning_rate": 2.9518072289156627e-06, "loss": -0.0131, "num_tokens": 261800657.0, "reward": 0.9921531319618225, "reward_std": 0.13675991892814637, "rewards/accuracy_reward": 0.7078125, "rewards/brier_reward": 0.7964969754219056, "rewards/confidence_uniqueness_reward": 0.9329366683959961, "rewards/format_reward": 0.9843749880790711, "rewards/frontier_aurc_reward": -0.0011116554378531873, "rewards/frontier_coverage_0": 0.0021981429308652878, "rewards/frontier_coverage_1": 0.0021981429308652878, "rewards/frontier_coverage_10": 0.0021981429308652878, "rewards/frontier_coverage_15": 0.0021981429308652878, "rewards/frontier_coverage_20": 0.0021981429308652878, "rewards/frontier_coverage_25": 0.0021981429308652878, "rewards/frontier_coverage_5": 0.0021981429308652878, "rewards/frontier_ece_reward": 0.02157861925661564, "rewards/frontier_entropy_batch_reward": -0.2922031283378601, "signal/accuracy_reward/centered_abs_mean": 0.16134982705116271, "signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888, "signal/accuracy_reward/group_std_mean": 0.21803545951843262, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08067491352558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08067491352558136, "signal/advantage_abs_mean": 0.09815683215856552, "signal/advantage_pre_scale_abs_mean": 0.09815683215856552, "signal/advantage_pre_scale_std": 0.16332031190395355, "signal/advantage_std": 0.16332031190395355, "signal/brier_reward/centered_abs_mean": 0.14194732010364533, "signal/brier_reward/group_bin_occupancy": 0.80625, "signal/brier_reward/group_std_mean": 0.18631795942783355, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014194732159376144, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014194732159376144, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03753194957971573, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8, "signal/confidence_uniqueness_reward/group_std_mean": 0.0628928780555725, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037531950045377018, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037531950045377018, "signal/format_reward/centered_abs_mean": 0.02554253451526165, "signal/format_reward/group_bin_occupancy": 0.15034722222222224, "signal/format_reward/group_std_mean": 0.04873799011111259, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012771267257630826, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012771267257630826, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013877948513254523, "signal/frontier_aurc_reward/group_bin_occupancy": 0.685763888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0022338322829455136, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7347435641568153e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7347435641568153e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_0/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_0/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_1/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_1/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_1/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_10/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_10/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_10/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_15/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_15/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_15/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_20/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_20/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_20/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_25/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_25/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_25/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_5/centered_abs_mean": 0.18613446354866028, "signal/frontier_coverage_5/group_bin_occupancy": 0.8159722222222221, "signal/frontier_coverage_5/group_std_mean": 0.24784242510795593, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023266808595508335, "signal/frontier_ece_reward/centered_abs_mean": 0.04411152824759483, "signal/frontier_ece_reward/group_bin_occupancy": 0.671875, "signal/frontier_ece_reward/group_std_mean": 0.05458846464753151, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004411152843385935, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004411152843385935, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32641210556030276, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7527777777777778, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39622201919555666, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03264121115207672, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03264121115207672, "step": 110 }, { "calibration/aurc": 0.28311624676865715, "calibration/batch_distribution_entropy": 0.9819046158374496, "calibration/batch_entropy_100bins": 0.9621165215133629, "calibration/batch_entropy_10bins": 0.9819046158374496, "calibration/batch_entropy_50bins": 0.9711147626556473, "calibration/batch_uniqueness": 0.9519674283199551, "calibration/buffer_distribution_entropy": 0.9312139349056319, "calibration/buffer_entropy_100bins": 0.9261870789929871, "calibration/buffer_entropy_10bins": 0.9312139349056319, "calibration/buffer_entropy_50bins": 0.9405647963056272, "calibration/confidence_entropy": 0.5083362254533048, "calibration/coverage@0%": 0.008015547191444707, "calibration/coverage@1%": 0.008015547191444707, "calibration/coverage@10%": 0.08324067443727931, "calibration/coverage@15%": 0.3029031354053958, "calibration/coverage@20%": 0.4227714752431936, "calibration/coverage@25%": 0.5560391535985508, "calibration/coverage@30%": 0.6467141986346506, "calibration/coverage@5%": 0.008015547191444707, "calibration/ece": 0.20618875522590968, "calibration/mean_confidence": 0.5348565191388408, "calibration/prompt_uniqueness": 0.8555982164395447, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023524305555555534, "completions/max_length": 3952.4, "completions/max_terminated_length": 3952.4, "completions/mean_length": 1017.3613525390625, "completions/mean_terminated_length": 1042.0224365234376, "completions/min_length": 0.0, "completions/min_terminated_length": 300.4, "epoch": 0.27599655004312446, "grad_norm": 0.00028006560751236975, "learning_rate": 2.8012048192771087e-06, "loss": -0.0197, "num_tokens": 276599860.0, "reward": 0.9687199473381043, "reward_std": 0.1485589861869812, "rewards/accuracy_reward": 0.664843738079071, "rewards/brier_reward": 0.7744507193565369, "rewards/confidence_uniqueness_reward": 0.9282928586006165, "rewards/format_reward": 0.9762152791023254, "rewards/frontier_aurc_reward": -0.0013954649912193418, "rewards/frontier_coverage_0": 0.012234875041758641, "rewards/frontier_coverage_1": 0.012234875041758641, "rewards/frontier_coverage_10": 0.012234875041758641, "rewards/frontier_coverage_15": 0.012234875041758641, "rewards/frontier_coverage_20": 0.012234875041758641, "rewards/frontier_coverage_25": 0.012234875041758641, "rewards/frontier_coverage_5": 0.012234875041758641, "rewards/frontier_ece_reward": 0.015254579298198224, "rewards/frontier_entropy_batch_reward": -0.2466247111558914, "signal/accuracy_reward/centered_abs_mean": 0.16998155415058136, "signal/accuracy_reward/group_bin_occupancy": 0.2041666666666667, "signal/accuracy_reward/group_std_mean": 0.22179057002067565, "signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08499077707529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08499077707529068, "signal/advantage_abs_mean": 0.10761507451534272, "signal/advantage_pre_scale_abs_mean": 0.10761507451534272, "signal/advantage_pre_scale_std": 0.17629152834415435, "signal/advantage_std": 0.17629152834415435, "signal/brier_reward/centered_abs_mean": 0.1553166389465332, "signal/brier_reward/group_bin_occupancy": 0.8298611111111113, "signal/brier_reward/group_std_mean": 0.20035399496555328, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015531663782894611, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015531663782894611, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04595714658498764, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7524305555555556, "signal/confidence_uniqueness_reward/group_std_mean": 0.07812547087669372, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004595714528113604, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004595714528113604, "signal/format_reward/centered_abs_mean": 0.03736979141831398, "signal/format_reward/group_bin_occupancy": 0.15868055555555555, "signal/format_reward/group_std_mean": 0.06836798191070556, "signal/format_reward/group_zero_std_frac": 0.7305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01868489570915699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01868489570915699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014289145823568107, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6930555555555555, "signal/frontier_aurc_reward/group_std_mean": 0.002272111759521067, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7861432934296317e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7861432934296317e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_0/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_0/group_std_mean": 0.259122833609581, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_1/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_1/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_1/group_std_mean": 0.259122833609581, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_10/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_10/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_10/group_std_mean": 0.259122833609581, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_15/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_15/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_15/group_std_mean": 0.259122833609581, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_20/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_20/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_20/group_std_mean": 0.259122833609581, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_25/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_25/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_25/group_std_mean": 0.259122833609581, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_5/centered_abs_mean": 0.19874320030212403, "signal/frontier_coverage_5/group_bin_occupancy": 0.8319444444444445, "signal/frontier_coverage_5/group_std_mean": 0.259122833609581, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024842898827046158, "signal/frontier_ece_reward/centered_abs_mean": 0.040270973742008206, "signal/frontier_ece_reward/group_bin_occupancy": 0.6878472222222223, "signal/frontier_ece_reward/group_std_mean": 0.05081784054636955, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004027097299695015, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004027097299695015, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31121625900268557, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7447916666666666, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3846454739570618, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031121626123785974, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031121626123785974, "step": 115 }, { "calibration/aurc": 0.2595476812471177, "calibration/batch_distribution_entropy": 0.962110799023046, "calibration/batch_entropy_100bins": 0.955106701333807, "calibration/batch_entropy_10bins": 0.962110799023046, "calibration/batch_entropy_50bins": 0.9644720183778576, "calibration/batch_uniqueness": 0.9499307501739891, "calibration/buffer_distribution_entropy": 0.9355022742156173, "calibration/buffer_entropy_100bins": 0.9312474850497697, "calibration/buffer_entropy_10bins": 0.9355022742156173, "calibration/buffer_entropy_50bins": 0.9447033893558565, "calibration/confidence_entropy": 0.48475043392478445, "calibration/coverage@0%": 0.011083498287003456, "calibration/coverage@1%": 0.011083498287003456, "calibration/coverage@10%": 0.3635380791890612, "calibration/coverage@15%": 0.4410849483790093, "calibration/coverage@20%": 0.5160072720239822, "calibration/coverage@25%": 0.5471758299142111, "calibration/coverage@30%": 0.5812770248801581, "calibration/coverage@5%": 0.10752677005481348, "calibration/ece": 0.1552948007514812, "calibration/mean_confidence": 0.5953624452801337, "calibration/prompt_uniqueness": 0.8642060771148008, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01796875, "completions/max_length": 3848.0, "completions/max_terminated_length": 3848.0, "completions/mean_length": 1007.0320190429687, "completions/mean_terminated_length": 1025.3716430664062, "completions/min_length": 0.0, "completions/min_terminated_length": 343.6, "epoch": 0.28799640004499943, "grad_norm": 0.00025856465799733996, "learning_rate": 2.6506024096385547e-06, "loss": -0.0155, "num_tokens": 291282725.0, "reward": 0.978645408153534, "reward_std": 0.13838702142238618, "rewards/accuracy_reward": 0.6781249880790711, "rewards/brier_reward": 0.7952387213706971, "rewards/confidence_uniqueness_reward": 0.9324679374694824, "rewards/format_reward": 0.98203125, "rewards/frontier_aurc_reward": -0.0012951105483807624, "rewards/frontier_coverage_0": 0.018445078120566904, "rewards/frontier_coverage_1": 0.018445078120566904, "rewards/frontier_coverage_10": 0.018445078120566904, "rewards/frontier_coverage_15": 0.018445078120566904, "rewards/frontier_coverage_20": 0.018445078120566904, "rewards/frontier_coverage_25": 0.018445078120566904, "rewards/frontier_coverage_5": 0.018445078120566904, "rewards/frontier_ece_reward": 0.018771170079708098, "rewards/frontier_entropy_batch_reward": -0.27678276896476744, "signal/accuracy_reward/centered_abs_mean": 0.16322699785232545, "signal/accuracy_reward/group_bin_occupancy": 0.20208333333333334, "signal/accuracy_reward/group_std_mean": 0.21511903703212737, "signal/accuracy_reward/group_zero_std_frac": 0.3833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08161349892616272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08161349892616272, "signal/advantage_abs_mean": 0.10217539519071579, "signal/advantage_pre_scale_abs_mean": 0.10217539519071579, "signal/advantage_pre_scale_std": 0.16570760905742646, "signal/advantage_std": 0.16570760905742646, "signal/brier_reward/centered_abs_mean": 0.14061089158058165, "signal/brier_reward/group_bin_occupancy": 0.8267361111111111, "signal/brier_reward/group_std_mean": 0.18166620433330535, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014061089418828488, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014061089418828488, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038473252952098844, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8069444444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.062386732548475266, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038473252672702072, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038473252672702072, "signal/format_reward/centered_abs_mean": 0.02782660610973835, "signal/format_reward/group_bin_occupancy": 0.14930555555555555, "signal/format_reward/group_std_mean": 0.04968899041414261, "signal/format_reward/group_zero_std_frac": 0.8055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013913303054869175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013913303054869175, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014574224362149835, "signal/frontier_aurc_reward/group_bin_occupancy": 0.690625, "signal/frontier_aurc_reward/group_std_mean": 0.0023057571612298488, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8217780234408564e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8217780234408564e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_0/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_0/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_1/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_1/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_1/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_10/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_10/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_10/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_15/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_15/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_15/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_20/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_20/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_20/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_25/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_25/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_25/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_5/centered_abs_mean": 0.1772557020187378, "signal/frontier_coverage_5/group_bin_occupancy": 0.8270833333333332, "signal/frontier_coverage_5/group_std_mean": 0.2352249562740326, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022156964056193828, "signal/frontier_ece_reward/centered_abs_mean": 0.03890540599822998, "signal/frontier_ece_reward/group_bin_occupancy": 0.6711805555555557, "signal/frontier_ece_reward/group_std_mean": 0.048710108548402783, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038905406836420298, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038905406836420298, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3223755657672882, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7454861111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39308597445487975, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03223755843937397, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03223755843937397, "step": 120 }, { "calibration/aurc": 0.15175947975368032, "calibration/batch_distribution_entropy": 0.9376645983193402, "calibration/batch_entropy_100bins": 0.9415076330475506, "calibration/batch_entropy_10bins": 0.9376645983193402, "calibration/batch_entropy_50bins": 0.9499838957016375, "calibration/batch_uniqueness": 0.9455843743770942, "calibration/buffer_distribution_entropy": 0.9375868310371841, "calibration/buffer_entropy_100bins": 0.9349363755430339, "calibration/buffer_entropy_10bins": 0.9375868310371841, "calibration/buffer_entropy_50bins": 0.9473378210710391, "calibration/confidence_entropy": 0.5195516841707571, "calibration/coverage@0%": 0.035550694075693494, "calibration/coverage@1%": 0.035550694075693494, "calibration/coverage@10%": 0.38478661557597077, "calibration/coverage@15%": 0.5340862971466895, "calibration/coverage@20%": 0.6252700952489321, "calibration/coverage@25%": 0.8642236051281177, "calibration/coverage@30%": 0.9356544982329478, "calibration/coverage@5%": 0.24815686443079188, "calibration/ece": 0.13946983254300122, "calibration/mean_confidence": 0.6065808418851792, "calibration/prompt_uniqueness": 0.8611937174457729, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016579861111111115, "completions/max_length": 4033.4, "completions/max_terminated_length": 4033.4, "completions/mean_length": 1023.8368286132812, "completions/mean_terminated_length": 1041.2441528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 312.0, "epoch": 0.2999962500468744, "grad_norm": 0.0002858802326954901, "learning_rate": 2.5e-06, "loss": -0.0149, "num_tokens": 306194989.0, "reward": 0.9803131580352783, "reward_std": 0.13643481433391572, "rewards/accuracy_reward": 0.6823784708976746, "rewards/brier_reward": 0.805714464187622, "rewards/confidence_uniqueness_reward": 0.9327924966812133, "rewards/format_reward": 0.9832465291023255, "rewards/frontier_aurc_reward": -0.0010212866007350385, "rewards/frontier_coverage_0": 0.016784844733774663, "rewards/frontier_coverage_1": 0.016784844733774663, "rewards/frontier_coverage_10": 0.016784844733774663, "rewards/frontier_coverage_15": 0.016784844733774663, "rewards/frontier_coverage_20": 0.016784844733774663, "rewards/frontier_coverage_25": 0.016784844733774663, "rewards/frontier_coverage_5": 0.016784844733774663, "rewards/frontier_ece_reward": 0.016746819019317627, "rewards/frontier_entropy_batch_reward": -0.29480605721473696, "signal/accuracy_reward/centered_abs_mean": 0.15901150405406952, "signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335, "signal/accuracy_reward/group_std_mean": 0.20940764546394347, "signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07950575202703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07950575202703476, "signal/advantage_abs_mean": 0.10095046162605285, "signal/advantage_pre_scale_abs_mean": 0.10095046162605285, "signal/advantage_pre_scale_std": 0.16176269948482513, "signal/advantage_std": 0.16176269948482513, "signal/brier_reward/centered_abs_mean": 0.1315429389476776, "signal/brier_reward/group_bin_occupancy": 0.8329861111111111, "signal/brier_reward/group_std_mean": 0.17215375006198883, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013154294155538083, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013154294155538083, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03707269802689552, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.815625, "signal/confidence_uniqueness_reward/group_std_mean": 0.06010228767991066, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003707269812002778, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003707269812002778, "signal/format_reward/centered_abs_mean": 0.02627495713531971, "signal/format_reward/group_bin_occupancy": 0.14861111111111114, "signal/format_reward/group_std_mean": 0.047227922827005386, "signal/format_reward/group_zero_std_frac": 0.8111111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013137478567659854, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013137478567659854, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011868951609358192, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6777777777777778, "signal/frontier_aurc_reward/group_std_mean": 0.0019993403926491736, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4836190712230745e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4836190712230745e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_0/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_1/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_1/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_10/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_10/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_15/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_15/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_15/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_20/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_20/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_20/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_25/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_25/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_5/centered_abs_mean": 0.1744101345539093, "signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221, "signal/frontier_coverage_5/group_std_mean": 0.22863954305648804, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021801266819238664, "signal/frontier_ece_reward/centered_abs_mean": 0.0351276122033596, "signal/frontier_ece_reward/group_bin_occupancy": 0.6805555555555556, "signal/frontier_ece_reward/group_std_mean": 0.04449153020977974, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035127611830830575, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035127611830830575, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33887292742729186, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751736111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40577629804611204, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033887290954589845, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033887290954589845, "step": 125 }, { "calibration/aurc": 0.189901764038449, "calibration/batch_distribution_entropy": 0.9520721871515259, "calibration/batch_entropy_100bins": 0.9454743219955299, "calibration/batch_entropy_10bins": 0.9520721871515259, "calibration/batch_entropy_50bins": 0.9555705289341347, "calibration/batch_uniqueness": 0.9463924227699888, "calibration/buffer_distribution_entropy": 0.9391385228864317, "calibration/buffer_entropy_100bins": 0.9380882383339472, "calibration/buffer_entropy_10bins": 0.9391385228864317, "calibration/buffer_entropy_50bins": 0.9495040517659845, "calibration/confidence_entropy": 0.47537128935550743, "calibration/coverage@0%": 0.023279596393052857, "calibration/coverage@1%": 0.023279596393052857, "calibration/coverage@10%": 0.26430076626209964, "calibration/coverage@15%": 0.4489382136469204, "calibration/coverage@20%": 0.6147929671232751, "calibration/coverage@25%": 0.7571622485068487, "calibration/coverage@30%": 0.8580091429839086, "calibration/coverage@5%": 0.14513451380368644, "calibration/ece": 0.08148051823192345, "calibration/mean_confidence": 0.5889737206246585, "calibration/prompt_uniqueness": 0.8531980149590808, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02743055555555556, "completions/max_length": 3838.8, "completions/max_terminated_length": 3838.8, "completions/mean_length": 1067.0234375, "completions/mean_terminated_length": 1097.4159790039062, "completions/min_length": 0.0, "completions/min_terminated_length": 281.8, "epoch": 0.3119961000487494, "grad_norm": 0.00029900847584940493, "learning_rate": 2.349397590361446e-06, "loss": -0.0208, "num_tokens": 321611899.0, "reward": 0.9622433066368103, "reward_std": 0.14578649401664734, "rewards/accuracy_reward": 0.6621527791023254, "rewards/brier_reward": 0.7844874382019043, "rewards/confidence_uniqueness_reward": 0.9220524430274963, "rewards/format_reward": 0.9722222208976745, "rewards/frontier_aurc_reward": -0.0012491632485762238, "rewards/frontier_coverage_0": 0.018586619477719068, "rewards/frontier_coverage_1": 0.018586619477719068, "rewards/frontier_coverage_10": 0.018586619477719068, "rewards/frontier_coverage_15": 0.018586619477719068, "rewards/frontier_coverage_20": 0.018586619477719068, "rewards/frontier_coverage_25": 0.018586619477719068, "rewards/frontier_coverage_5": 0.018586619477719068, "rewards/frontier_ece_reward": 0.015250921249389648, "rewards/frontier_entropy_batch_reward": -0.287339860200882, "signal/accuracy_reward/centered_abs_mean": 0.17348090708255767, "signal/accuracy_reward/group_bin_occupancy": 0.20381944444444447, "signal/accuracy_reward/group_std_mean": 0.22585095167160035, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08674045354127884, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08674045354127884, "signal/advantage_abs_mean": 0.11120370775461197, "signal/advantage_pre_scale_abs_mean": 0.11120370775461197, "signal/advantage_pre_scale_std": 0.1740236759185791, "signal/advantage_std": 0.1740236759185791, "signal/brier_reward/centered_abs_mean": 0.14372893869876863, "signal/brier_reward/group_bin_occupancy": 0.836111111111111, "signal/brier_reward/group_std_mean": 0.18368545174598694, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01437289360910654, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01437289360910654, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04728544950485229, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8177083333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.06920376718044281, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004728544875979424, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004728544875979424, "signal/format_reward/centered_abs_mean": 0.03650173675268888, "signal/format_reward/group_bin_occupancy": 0.14930555555555555, "signal/format_reward/group_std_mean": 0.056186852231621745, "signal/format_reward/group_zero_std_frac": 0.8055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01825086837634444, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01825086837634444, "signal/frontier_aurc_reward/centered_abs_mean": 0.001344931242056191, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333334, "signal/frontier_aurc_reward/group_std_mean": 0.0021691116970032455, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.68116404893226e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.68116404893226e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_0/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_1/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_1/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_10/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_10/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_10/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_15/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_15/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_15/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_20/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_20/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_20/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_25/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_25/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_25/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_5/centered_abs_mean": 0.1911786049604416, "signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666666, "signal/frontier_coverage_5/group_std_mean": 0.2510842025279999, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023897326085716487, "signal/frontier_ece_reward/centered_abs_mean": 0.035830476135015485, "signal/frontier_ece_reward/group_bin_occupancy": 0.6923611111111111, "signal/frontier_ece_reward/group_std_mean": 0.04510410130023956, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003583047725260258, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003583047725260258, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32898640632629395, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7326388888888888, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39836318492889405, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03289864137768746, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03289864137768746, "step": 130 }, { "calibration/aurc": 0.1919401835649107, "calibration/batch_distribution_entropy": 0.9360961840279008, "calibration/batch_entropy_100bins": 0.9377248130148897, "calibration/batch_entropy_10bins": 0.9360961840279008, "calibration/batch_entropy_50bins": 0.9453320580648233, "calibration/batch_uniqueness": 0.9424926809265267, "calibration/buffer_distribution_entropy": 0.940487986587587, "calibration/buffer_entropy_100bins": 0.9409743683693346, "calibration/buffer_entropy_10bins": 0.940487986587587, "calibration/buffer_entropy_50bins": 0.9514574364760401, "calibration/confidence_entropy": 0.4645515195275509, "calibration/coverage@0%": 0.05197618847080752, "calibration/coverage@1%": 0.05197618847080752, "calibration/coverage@10%": 0.33362163408868134, "calibration/coverage@15%": 0.4458003166583123, "calibration/coverage@20%": 0.5021032747158289, "calibration/coverage@25%": 0.614310550618702, "calibration/coverage@30%": 0.8264339062488768, "calibration/coverage@5%": 0.2619767454497537, "calibration/ece": 0.14291348187392, "calibration/mean_confidence": 0.6251500991165597, "calibration/prompt_uniqueness": 0.8484265296158569, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022048611111111137, "completions/max_length": 3967.2, "completions/max_terminated_length": 3967.2, "completions/mean_length": 1073.7470703125, "completions/mean_terminated_length": 1097.9343017578126, "completions/min_length": 0.0, "completions/min_terminated_length": 290.8, "epoch": 0.32399595005062437, "grad_norm": 0.0003013814566656947, "learning_rate": 2.1987951807228917e-06, "loss": -0.0192, "num_tokens": 337074489.0, "reward": 0.9745335817337036, "reward_std": 0.14610227048397065, "rewards/accuracy_reward": 0.6752604246139526, "rewards/brier_reward": 0.8024451017379761, "rewards/confidence_uniqueness_reward": 0.9262871026992798, "rewards/format_reward": 0.9777777791023254, "rewards/frontier_aurc_reward": -0.0011367214610800147, "rewards/frontier_coverage_0": 0.031039434671401977, "rewards/frontier_coverage_1": 0.031039434671401977, "rewards/frontier_coverage_10": 0.031039434671401977, "rewards/frontier_coverage_15": 0.031039434671401977, "rewards/frontier_coverage_20": 0.031039434671401977, "rewards/frontier_coverage_25": 0.031039434671401977, "rewards/frontier_coverage_5": 0.031039434671401977, "rewards/frontier_ece_reward": 0.019231295213103294, "rewards/frontier_entropy_batch_reward": -0.2948362112045288, "signal/accuracy_reward/centered_abs_mean": 0.163134765625, "signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888, "signal/accuracy_reward/group_std_mean": 0.21833232939243316, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0815673828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0815673828125, "signal/advantage_abs_mean": 0.10507439076900482, "signal/advantage_pre_scale_abs_mean": 0.10507439076900482, "signal/advantage_pre_scale_std": 0.17383444905281067, "signal/advantage_std": 0.17383444905281067, "signal/brier_reward/centered_abs_mean": 0.1440066486597061, "signal/brier_reward/group_bin_occupancy": 0.8013888888888889, "signal/brier_reward/group_std_mean": 0.18859705626964568, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014400665648281574, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014400665648281574, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04786202013492584, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7704861111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.07885360568761826, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004786202218383551, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004786202218383551, "signal/format_reward/centered_abs_mean": 0.0367078997194767, "signal/format_reward/group_bin_occupancy": 0.15694444444444444, "signal/format_reward/group_std_mean": 0.0658931627869606, "signal/format_reward/group_zero_std_frac": 0.7444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01835394985973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01835394985973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014502544421702624, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6881944444444444, "signal/frontier_aurc_reward/group_std_mean": 0.002394520537927747, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8128181181964464e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8128181181964464e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_0/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_0/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_1/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_1/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_1/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_10/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_10/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_10/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_15/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_15/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_15/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_20/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_20/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_20/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_25/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_25/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_25/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_5/centered_abs_mean": 0.18389809429645537, "signal/frontier_coverage_5/group_bin_occupancy": 0.8100694444444445, "signal/frontier_coverage_5/group_std_mean": 0.2444453001022339, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022987262811511753, "signal/frontier_ece_reward/centered_abs_mean": 0.03592751622200012, "signal/frontier_ece_reward/group_bin_occupancy": 0.679861111111111, "signal/frontier_ece_reward/group_std_mean": 0.04412141665816307, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003592751733958721, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003592751733958721, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3293720781803131, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7475694444444445, "signal/frontier_entropy_batch_reward/group_std_mean": 0.398487514257431, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03293720856308937, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03293720856308937, "step": 135 }, { "calibration/aurc": 0.12348685325559985, "calibration/batch_distribution_entropy": 0.9452073614807169, "calibration/batch_entropy_100bins": 0.9462193369534104, "calibration/batch_entropy_10bins": 0.9452073614807169, "calibration/batch_entropy_50bins": 0.9547051944481133, "calibration/batch_uniqueness": 0.9456989720112439, "calibration/buffer_distribution_entropy": 0.9442883870516695, "calibration/buffer_entropy_100bins": 0.9466428389842558, "calibration/buffer_entropy_10bins": 0.9442883870516695, "calibration/buffer_entropy_50bins": 0.9555018867627533, "calibration/confidence_entropy": 0.4717980803303433, "calibration/coverage@0%": 0.05851068594704365, "calibration/coverage@1%": 0.05851068594704365, "calibration/coverage@10%": 0.4795897983929943, "calibration/coverage@15%": 0.7379147049577794, "calibration/coverage@20%": 0.822024690104827, "calibration/coverage@25%": 0.9007581452775323, "calibration/coverage@30%": 0.9805801689932363, "calibration/coverage@5%": 0.21591114286610366, "calibration/ece": 0.13830600383937194, "calibration/mean_confidence": 0.6107551203312859, "calibration/prompt_uniqueness": 0.8445896458858178, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02256944444444444, "completions/max_length": 3882.6, "completions/max_terminated_length": 3882.6, "completions/mean_length": 1070.8292358398437, "completions/mean_terminated_length": 1095.48896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 360.4, "epoch": 0.33599580005249935, "grad_norm": 0.00026106671430170536, "learning_rate": 2.0481927710843377e-06, "loss": -0.0181, "num_tokens": 352514666.0, "reward": 0.9759422659873962, "reward_std": 0.13966879844665528, "rewards/accuracy_reward": 0.6809027910232544, "rewards/brier_reward": 0.7947320222854615, "rewards/confidence_uniqueness_reward": 0.927030086517334, "rewards/format_reward": 0.9770833492279053, "rewards/frontier_aurc_reward": -0.0010663935798220336, "rewards/frontier_coverage_0": 0.01920067030005157, "rewards/frontier_coverage_1": 0.01920067030005157, "rewards/frontier_coverage_10": 0.01920067030005157, "rewards/frontier_coverage_15": 0.01920067030005157, "rewards/frontier_coverage_20": 0.01920067030005157, "rewards/frontier_coverage_25": 0.01920067030005157, "rewards/frontier_coverage_5": 0.01920067030005157, "rewards/frontier_ece_reward": 0.014913215488195419, "rewards/frontier_entropy_batch_reward": -0.28385027050971984, "signal/accuracy_reward/centered_abs_mean": 0.15207248330116271, "signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776, "signal/accuracy_reward/group_std_mean": 0.2055516541004181, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07603624165058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07603624165058136, "signal/advantage_abs_mean": 0.10126451849937439, "signal/advantage_pre_scale_abs_mean": 0.10126451849937439, "signal/advantage_pre_scale_std": 0.16675151288509368, "signal/advantage_std": 0.16675151288509368, "signal/brier_reward/centered_abs_mean": 0.14222416579723357, "signal/brier_reward/group_bin_occupancy": 0.8145833333333332, "signal/brier_reward/group_std_mean": 0.18435073792934417, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014222417026758194, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014222417026758194, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.043567462265491484, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7763888888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.07227423414587975, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00435674637556076, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00435674637556076, "signal/format_reward/centered_abs_mean": 0.03331163227558136, "signal/format_reward/group_bin_occupancy": 0.15555555555555556, "signal/format_reward/group_std_mean": 0.06039545834064484, "signal/format_reward/group_zero_std_frac": 0.7555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01665581613779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01665581613779068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013383281184360385, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7003472222222221, "signal/frontier_aurc_reward/group_std_mean": 0.002201914181932807, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.672910220804624e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.672910220804624e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_0/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_0/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_1/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_1/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_1/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_10/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_10/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_10/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_15/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_15/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_15/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_20/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_20/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_20/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_25/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_25/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_25/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_5/centered_abs_mean": 0.1825083911418915, "signal/frontier_coverage_5/group_bin_occupancy": 0.8243055555555555, "signal/frontier_coverage_5/group_std_mean": 0.24250001609325408, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022813548799604177, "signal/frontier_ece_reward/centered_abs_mean": 0.03229107595980167, "signal/frontier_ece_reward/group_bin_occupancy": 0.6559027777777777, "signal/frontier_ece_reward/group_std_mean": 0.04013029932975769, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003229107800871134, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003229107800871134, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3295544445514679, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751736111111111, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3994203209877014, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03295544609427452, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03295544609427452, "step": 140 }, { "calibration/aurc": 0.16963840721934928, "calibration/batch_distribution_entropy": 0.9839276501014798, "calibration/batch_entropy_100bins": 0.9645983079691083, "calibration/batch_entropy_10bins": 0.9839276501014798, "calibration/batch_entropy_50bins": 0.9761114279031353, "calibration/batch_uniqueness": 0.9537371663907928, "calibration/buffer_distribution_entropy": 0.9547933136371283, "calibration/buffer_entropy_100bins": 0.9584469083864826, "calibration/buffer_entropy_10bins": 0.9547933136371283, "calibration/buffer_entropy_50bins": 0.9646369849007005, "calibration/confidence_entropy": 0.4893747832641259, "calibration/coverage@0%": 0.07835068493854391, "calibration/coverage@1%": 0.13061735160521057, "calibration/coverage@10%": 0.3460718360292577, "calibration/coverage@15%": 0.49166136873047694, "calibration/coverage@20%": 0.6608290070486337, "calibration/coverage@25%": 0.7759027731944482, "calibration/coverage@30%": 0.835348769127411, "calibration/coverage@5%": 0.17083356782142678, "calibration/ece": 0.12891986027646224, "calibration/mean_confidence": 0.531905021969054, "calibration/prompt_uniqueness": 0.8497211296793032, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02022569444444442, "completions/max_length": 3858.6, "completions/max_terminated_length": 3858.6, "completions/mean_length": 1056.7715454101562, "completions/mean_terminated_length": 1078.7728881835938, "completions/min_length": 0.0, "completions/min_terminated_length": 340.4, "epoch": 0.34799565005437433, "grad_norm": 0.0002779490314424038, "learning_rate": 1.8975903614457832e-06, "loss": -0.0175, "num_tokens": 367753282.0, "reward": 0.9878696322441101, "reward_std": 0.13212112337350845, "rewards/accuracy_reward": 0.6973958134651184, "rewards/brier_reward": 0.798538327217102, "rewards/confidence_uniqueness_reward": 0.931287407875061, "rewards/format_reward": 0.9796875, "rewards/frontier_aurc_reward": -0.0010055402875877918, "rewards/frontier_coverage_0": 0.01543128564953804, "rewards/frontier_coverage_1": 0.01543128564953804, "rewards/frontier_coverage_10": 0.01543128564953804, "rewards/frontier_coverage_15": 0.01543128564953804, "rewards/frontier_coverage_20": 0.01543128564953804, "rewards/frontier_coverage_25": 0.01778712384402752, "rewards/frontier_coverage_5": 0.01543128564953804, "rewards/frontier_ece_reward": 0.009699131641536952, "rewards/frontier_entropy_batch_reward": -0.2599165678024292, "signal/accuracy_reward/centered_abs_mean": 0.14628906548023224, "signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335, "signal/accuracy_reward/group_std_mean": 0.19899411499500275, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07314453274011612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07314453274011612, "signal/advantage_abs_mean": 0.0954915538430214, "signal/advantage_pre_scale_abs_mean": 0.0954915538430214, "signal/advantage_pre_scale_std": 0.16041628420352935, "signal/advantage_std": 0.16041628420352935, "signal/brier_reward/centered_abs_mean": 0.13802383542060853, "signal/brier_reward/group_bin_occupancy": 0.8170138888888889, "signal/brier_reward/group_std_mean": 0.17916561365127565, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01380238328129053, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01380238328129053, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.041919562965631485, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8003472222222221, "signal/confidence_uniqueness_reward/group_std_mean": 0.06718875169754028, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00419195624999702, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00419195624999702, "signal/format_reward/centered_abs_mean": 0.03240017406642437, "signal/format_reward/group_bin_occupancy": 0.15208333333333335, "signal/format_reward/group_std_mean": 0.05611613690853119, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016200087033212185, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016200087033212185, "signal/frontier_aurc_reward/centered_abs_mean": 0.001244223420508206, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6961805555555556, "signal/frontier_aurc_reward/group_std_mean": 0.002067322516813874, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.555279395688558e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.555279395688558e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18761368095874786, "signal/frontier_coverage_0/group_bin_occupancy": 0.809375, "signal/frontier_coverage_0/group_std_mean": 0.24638648331165314, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_1/centered_abs_mean": 0.18761368095874786, "signal/frontier_coverage_1/group_bin_occupancy": 0.809375, "signal/frontier_coverage_1/group_std_mean": 0.24638648331165314, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_10/centered_abs_mean": 0.18761368095874786, "signal/frontier_coverage_10/group_bin_occupancy": 0.809375, "signal/frontier_coverage_10/group_std_mean": 0.24638648331165314, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_15/centered_abs_mean": 0.18761368095874786, "signal/frontier_coverage_15/group_bin_occupancy": 0.809375, "signal/frontier_coverage_15/group_std_mean": 0.24638648331165314, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_20/centered_abs_mean": 0.18761368095874786, "signal/frontier_coverage_20/group_bin_occupancy": 0.809375, "signal/frontier_coverage_20/group_std_mean": 0.24638648331165314, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_25/centered_abs_mean": 0.16378130316734313, "signal/frontier_coverage_25/group_bin_occupancy": 0.803125, "signal/frontier_coverage_25/group_std_mean": 0.21585616767406463, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020472663221880794, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020472663221880794, "signal/frontier_coverage_5/centered_abs_mean": 0.18761368095874786, "signal/frontier_coverage_5/group_bin_occupancy": 0.809375, "signal/frontier_coverage_5/group_std_mean": 0.24638648331165314, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023451711051166056, "signal/frontier_ece_reward/centered_abs_mean": 0.02758239060640335, "signal/frontier_ece_reward/group_bin_occupancy": 0.648611111111111, "signal/frontier_ece_reward/group_std_mean": 0.03442221805453301, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027582390699535607, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027582390699535607, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31868948936462405, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7479166666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38932323455810547, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03186894841492176, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03186894841492176, "step": 145 }, { "calibration/aurc": 0.14581408637367196, "calibration/batch_distribution_entropy": 0.9620467147080662, "calibration/batch_entropy_100bins": 0.9521593826962524, "calibration/batch_entropy_10bins": 0.9620467147080662, "calibration/batch_entropy_50bins": 0.9628477325770243, "calibration/batch_uniqueness": 0.9493508453284983, "calibration/buffer_distribution_entropy": 0.964346395839352, "calibration/buffer_entropy_100bins": 0.9687164702434309, "calibration/buffer_entropy_10bins": 0.964346395839352, "calibration/buffer_entropy_50bins": 0.9727008787721898, "calibration/confidence_entropy": 0.48253928652287603, "calibration/coverage@0%": 0.11276037364275815, "calibration/coverage@1%": 0.13155932925633518, "calibration/coverage@10%": 0.5150240243089297, "calibration/coverage@15%": 0.5969002751000211, "calibration/coverage@20%": 0.6727406402156719, "calibration/coverage@25%": 0.760355493197759, "calibration/coverage@30%": 0.8286161138459842, "calibration/coverage@5%": 0.41456603762425204, "calibration/ece": 0.1701717909819578, "calibration/mean_confidence": 0.5407732633406844, "calibration/prompt_uniqueness": 0.8612606071323716, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016319444444444442, "completions/max_length": 3997.8, "completions/max_terminated_length": 3997.8, "completions/mean_length": 1132.7577880859376, "completions/mean_terminated_length": 1151.5529541015626, "completions/min_length": 0.0, "completions/min_terminated_length": 325.6, "epoch": 0.3599955000562493, "grad_norm": 0.00029174465453252196, "learning_rate": 1.7469879518072292e-06, "loss": -0.0143, "num_tokens": 383912988.0, "reward": 0.9861086249351502, "reward_std": 0.14031601548194886, "rewards/accuracy_reward": 0.6947916626930237, "rewards/brier_reward": 0.8056597113609314, "rewards/confidence_uniqueness_reward": 0.9327221870422363, "rewards/format_reward": 0.9835069417953491, "rewards/frontier_aurc_reward": -0.0011077008675783873, "rewards/frontier_coverage_0": 0.01600627228617668, "rewards/frontier_coverage_1": 0.01600627228617668, "rewards/frontier_coverage_10": 0.01600627228617668, "rewards/frontier_coverage_15": 0.01600627228617668, "rewards/frontier_coverage_20": 0.019521726109087468, "rewards/frontier_coverage_25": 0.04305166006088257, "rewards/frontier_coverage_5": 0.01600627228617668, "rewards/frontier_ece_reward": 0.006581637542694807, "rewards/frontier_entropy_batch_reward": -0.2930577486753464, "signal/accuracy_reward/centered_abs_mean": 0.1642144054174423, "signal/accuracy_reward/group_bin_occupancy": 0.2013888888888889, "signal/accuracy_reward/group_std_mean": 0.21578683853149414, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08210720270872116, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08210720270872116, "signal/advantage_abs_mean": 0.10177487134933472, "signal/advantage_pre_scale_abs_mean": 0.10177487134933472, "signal/advantage_pre_scale_std": 0.16584074795246123, "signal/advantage_std": 0.16584074795246123, "signal/brier_reward/centered_abs_mean": 0.1371104210615158, "signal/brier_reward/group_bin_occupancy": 0.8177083333333333, "signal/brier_reward/group_std_mean": 0.18097830712795257, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013711042888462543, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013711042888462543, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03890909440815449, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7947916666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.0675606332719326, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003890909440815449, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003890909440815449, "signal/format_reward/centered_abs_mean": 0.02860243022441864, "signal/format_reward/group_bin_occupancy": 0.1545138888888889, "signal/format_reward/group_std_mean": 0.055647566169500354, "signal/format_reward/group_zero_std_frac": 0.7638889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01430121511220932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01430121511220932, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015429736115038395, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6982638888888889, "signal/frontier_aurc_reward/group_std_mean": 0.002718376787379384, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9287169561721384e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9287169561721384e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18298667073249816, "signal/frontier_coverage_0/group_bin_occupancy": 0.8253472222222221, "signal/frontier_coverage_0/group_std_mean": 0.24271575808525087, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_1/centered_abs_mean": 0.18298667073249816, "signal/frontier_coverage_1/group_bin_occupancy": 0.8253472222222221, "signal/frontier_coverage_1/group_std_mean": 0.24271575808525087, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_10/centered_abs_mean": 0.18298667073249816, "signal/frontier_coverage_10/group_bin_occupancy": 0.8253472222222221, "signal/frontier_coverage_10/group_std_mean": 0.24271575808525087, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_15/centered_abs_mean": 0.18298667073249816, "signal/frontier_coverage_15/group_bin_occupancy": 0.8253472222222221, "signal/frontier_coverage_15/group_std_mean": 0.24271575808525087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_20/centered_abs_mean": 0.1552409678697586, "signal/frontier_coverage_20/group_bin_occupancy": 0.8149305555555555, "signal/frontier_coverage_20/group_std_mean": 0.20808847844600678, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019405121915042401, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019405121915042401, "signal/frontier_coverage_25/centered_abs_mean": 0.07173716872930527, "signal/frontier_coverage_25/group_bin_occupancy": 0.8854166666666666, "signal/frontier_coverage_25/group_std_mean": 0.0958094283938408, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008967146510258317, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008967146510258317, "signal/frontier_coverage_5/centered_abs_mean": 0.18298667073249816, "signal/frontier_coverage_5/group_bin_occupancy": 0.8253472222222221, "signal/frontier_coverage_5/group_std_mean": 0.24271575808525087, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002287333458662033, "signal/frontier_ece_reward/centered_abs_mean": 0.02298327349126339, "signal/frontier_ece_reward/group_bin_occupancy": 0.7138888888888888, "signal/frontier_ece_reward/group_std_mean": 0.028938150405883788, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022983273956924677, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022983273956924677, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3349026620388031, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7590277777777777, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4039584219455719, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033490267023444174, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033490267023444174, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.14166378509288816, "eval_calibration/batch_distribution_entropy": 0.8977435367759877, "eval_calibration/batch_entropy_100bins": 0.6974634442420786, "eval_calibration/batch_entropy_10bins": 0.8977435367759877, "eval_calibration/batch_entropy_50bins": 0.7715932785376074, "eval_calibration/batch_uniqueness": 0.8898039151925078, "eval_calibration/buffer_distribution_entropy": 0.9691951475996244, "eval_calibration/buffer_entropy_100bins": 0.974236048816972, "eval_calibration/buffer_entropy_10bins": 0.9691951475996244, "eval_calibration/buffer_entropy_50bins": 0.9768814779586684, "eval_calibration/confidence_entropy": 0.49954160875782266, "eval_calibration/coverage@0%": 0.235383064516129, "eval_calibration/coverage@1%": 0.235383064516129, "eval_calibration/coverage@10%": 0.40305779569892475, "eval_calibration/coverage@15%": 0.5608198924731183, "eval_calibration/coverage@20%": 0.766633064516129, "eval_calibration/coverage@25%": 0.9040658602150536, "eval_calibration/coverage@30%": 0.9786626344086021, "eval_calibration/coverage@5%": 0.235383064516129, "eval_calibration/ece": 0.17516353628706025, "eval_calibration/mean_confidence": 0.5908915400357312, "eval_calibration/prompt_uniqueness": 0.8898039151925078, "eval_completions/clipped_ratio": 0.016493055555555563, "eval_completions/max_length": 3125.1666666666665, "eval_completions/max_terminated_length": 3125.1666666666665, "eval_completions/mean_length": 1075.0045369466145, "eval_completions/mean_terminated_length": 1092.6835530598958, "eval_completions/min_length": 76.0, "eval_completions/min_terminated_length": 414.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 383912988.0, "eval_reward": 0.9052900274594625, "eval_reward_std": 0.2462936962644259, "eval_rewards/accuracy_reward": 0.6831597089767456, "eval_rewards/brier_reward": 0.8052956362565359, "eval_rewards/confidence_uniqueness_reward": 0.8782645761966705, "eval_rewards/format_reward": 0.9800347288449606, "eval_rewards/frontier_aurc_reward": -0.0012432806252036244, "eval_rewards/frontier_coverage_0": 0.024572810948787566, "eval_rewards/frontier_coverage_1": 0.024572810948787566, "eval_rewards/frontier_coverage_10": 0.024572810948787566, "eval_rewards/frontier_coverage_15": 0.024731364154528517, "eval_rewards/frontier_coverage_20": 0.0300130230995516, "eval_rewards/frontier_coverage_25": 0.0688376184552908, "eval_rewards/frontier_coverage_5": 0.024572810948787566, "eval_rewards/frontier_ece_reward": 0.005823705461807549, "eval_rewards/frontier_entropy_batch_reward": -0.9800347288449606, "eval_runtime": 215.2066, "eval_samples_per_second": 4.647, "eval_signal/accuracy_reward/centered_abs_mean": 0.4214952240387599, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.46546245117982227, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21074761201937994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21074761201937994, "eval_signal/advantage_abs_mean": 0.20943692326545715, "eval_signal/advantage_pre_scale_abs_mean": 0.20943692326545715, "eval_signal/advantage_pre_scale_std": 0.2453278973698616, "eval_signal/advantage_std": 0.2453278973698616, "eval_signal/brier_reward/centered_abs_mean": 0.18495392551024756, "eval_signal/brier_reward/group_bin_occupancy": 0.84375, "eval_signal/brier_reward/group_std_mean": 0.2431354746222496, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018495393606523674, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018495393606523674, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.061455123126506805, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3611111111111111, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10940167804559071, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061455123747388525, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061455123747388525, "eval_signal/format_reward/centered_abs_mean": 0.03781467008714875, "eval_signal/format_reward/group_bin_occupancy": 0.18055555555555555, "eval_signal/format_reward/group_std_mean": 0.09148847094426553, "eval_signal/format_reward/group_zero_std_frac": 0.5555555696288744, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018907335043574374, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.018907335043574374, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0021643370661574104, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.611111111111111, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004351850405024986, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7054214115196373e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7054214115196373e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.25391770899295807, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_0/group_std_mean": 0.3627063085635503, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.25391770899295807, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_1/group_std_mean": 0.3627063085635503, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.25391770899295807, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_10/group_std_mean": 0.3627063085635503, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2522597908973694, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9201388888888888, "eval_signal/frontier_coverage_15/group_std_mean": 0.3606320917606354, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031532473706950745, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031532473706950745, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.15832207848628363, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8854166666666666, "eval_signal/frontier_coverage_20/group_std_mean": 0.24005423734585443, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001979026031525185, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001979026031525185, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0908020759622256, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9479166666666669, "eval_signal/frontier_coverage_25/group_std_mean": 0.11604747300346692, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011350259883329272, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011350259883329272, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.25391770899295807, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_5/group_std_mean": 0.3627063085635503, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031739713546509543, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.026654658528665703, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8993055555555555, "eval_signal/frontier_ece_reward/group_std_mean": 0.03542460377017657, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026654657752563557, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026654657752563557, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03781467008714875, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.18055555555555555, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.09148847094426553, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5555555696288744, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0037814672493065395, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0037814672493065395, "eval_steps_per_second": 0.028, "step": 150 }, { "calibration/aurc": 0.1589188574324561, "calibration/batch_distribution_entropy": 0.9682778490933727, "calibration/batch_entropy_100bins": 0.9573909052133722, "calibration/batch_entropy_10bins": 0.9682778490933727, "calibration/batch_entropy_50bins": 0.966160103078671, "calibration/batch_uniqueness": 0.9501737534173381, "calibration/buffer_distribution_entropy": 0.9715026240037516, "calibration/buffer_entropy_100bins": 0.9773167131994699, "calibration/buffer_entropy_10bins": 0.9715026240037516, "calibration/buffer_entropy_50bins": 0.9790737695937416, "calibration/confidence_entropy": 0.5003052797857543, "calibration/coverage@0%": 0.016352829482242193, "calibration/coverage@1%": 0.016352829482242193, "calibration/coverage@10%": 0.48774055947796324, "calibration/coverage@15%": 0.5995304254029382, "calibration/coverage@20%": 0.6832017543859649, "calibration/coverage@25%": 0.8128728070175437, "calibration/coverage@30%": 0.8958662280701754, "calibration/coverage@5%": 0.059086569319640556, "calibration/ece": 0.17449626019352468, "calibration/mean_confidence": 0.5850255241339524, "calibration/prompt_uniqueness": 0.8585340912346615, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013802083333333326, "completions/max_length": 3833.6, "completions/max_terminated_length": 3833.6, "completions/mean_length": 1044.0293334960938, "completions/mean_terminated_length": 1058.7995727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 336.8, "epoch": 0.3719953500581243, "grad_norm": 0.0002921310951933265, "learning_rate": 1.5963855421686747e-06, "loss": -0.0106, "num_tokens": 399047918.0, "reward": 1.010855793952942, "reward_std": 0.13225770443677903, "rewards/accuracy_reward": 0.7420138955116272, "rewards/brier_reward": 0.8184032678604126, "rewards/confidence_uniqueness_reward": 0.9341031432151794, "rewards/format_reward": 0.9859375, "rewards/frontier_aurc_reward": -0.0009835207951255144, "rewards/frontier_coverage_0": -0.003601994086056948, "rewards/frontier_coverage_1": -0.003601994086056948, "rewards/frontier_coverage_10": -0.003601994086056948, "rewards/frontier_coverage_15": 0.0005078878486528993, "rewards/frontier_coverage_20": 0.021516397967934607, "rewards/frontier_coverage_25": 0.09996354579925537, "rewards/frontier_coverage_5": -0.003601994086056948, "rewards/frontier_ece_reward": 0.002238374725857284, "rewards/frontier_entropy_batch_reward": -0.2992683291435242, "signal/accuracy_reward/centered_abs_mean": 0.15916883647441865, "signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666, "signal/accuracy_reward/group_std_mean": 0.20814797878265381, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07958441823720933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07958441823720933, "signal/advantage_abs_mean": 0.09818726927042007, "signal/advantage_pre_scale_abs_mean": 0.09818726927042007, "signal/advantage_pre_scale_std": 0.1613670289516449, "signal/advantage_std": 0.1613670289516449, "signal/brier_reward/centered_abs_mean": 0.12828820943832397, "signal/brier_reward/group_bin_occupancy": 0.8326388888888889, "signal/brier_reward/group_std_mean": 0.16683202385902404, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012828820943832397, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012828820943832397, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03536626324057579, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8340277777777778, "signal/confidence_uniqueness_reward/group_std_mean": 0.05673680827021599, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003536626137793064, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003536626137793064, "signal/format_reward/centered_abs_mean": 0.02351345494389534, "signal/format_reward/group_bin_occupancy": 0.14583333333333334, "signal/format_reward/group_std_mean": 0.04251343086361885, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01175672747194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01175672747194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015422179130837321, "signal/frontier_aurc_reward/group_bin_occupancy": 0.717013888888889, "signal/frontier_aurc_reward/group_std_mean": 0.0027723016683012247, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.927772464114241e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.927772464114241e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16945272982120513, "signal/frontier_coverage_0/group_bin_occupancy": 0.8222222222222222, "signal/frontier_coverage_0/group_std_mean": 0.22849854230880737, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_1/centered_abs_mean": 0.16945272982120513, "signal/frontier_coverage_1/group_bin_occupancy": 0.8222222222222222, "signal/frontier_coverage_1/group_std_mean": 0.22849854230880737, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_10/centered_abs_mean": 0.16945272982120513, "signal/frontier_coverage_10/group_bin_occupancy": 0.8222222222222222, "signal/frontier_coverage_10/group_std_mean": 0.22849854230880737, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_15/centered_abs_mean": 0.15546642541885375, "signal/frontier_coverage_15/group_bin_occupancy": 0.8138888888888888, "signal/frontier_coverage_15/group_std_mean": 0.2106780767440796, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019433303037658333, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019433303037658333, "signal/frontier_coverage_20/centered_abs_mean": 0.08221425265073776, "signal/frontier_coverage_20/group_bin_occupancy": 0.8430555555555556, "signal/frontier_coverage_20/group_std_mean": 0.11387116461992264, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010276781744323672, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010276781744323672, "signal/frontier_coverage_25/centered_abs_mean": 0.07415155619382859, "signal/frontier_coverage_25/group_bin_occupancy": 0.9208333333333334, "signal/frontier_coverage_25/group_std_mean": 0.09463738054037094, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009268944384530186, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009268944384530186, "signal/frontier_coverage_5/centered_abs_mean": 0.16945272982120513, "signal/frontier_coverage_5/group_bin_occupancy": 0.8222222222222222, "signal/frontier_coverage_5/group_std_mean": 0.22849854230880737, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00211815913207829, "signal/frontier_ece_reward/centered_abs_mean": 0.019297819957137106, "signal/frontier_ece_reward/group_bin_occupancy": 0.7003472222222222, "signal/frontier_ece_reward/group_std_mean": 0.02474020905792713, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019297819584608079, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019297819584608079, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3254176914691925, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7572916666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3945078909397125, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03254176788032055, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03254176788032055, "step": 155 }, { "calibration/aurc": 0.13216890708804582, "calibration/batch_distribution_entropy": 0.9391698914560569, "calibration/batch_entropy_100bins": 0.9427522555652142, "calibration/batch_entropy_10bins": 0.9391698914560569, "calibration/batch_entropy_50bins": 0.9494372446380505, "calibration/batch_uniqueness": 0.9452771036630393, "calibration/buffer_distribution_entropy": 0.9759505965841125, "calibration/buffer_entropy_100bins": 0.9831831344351396, "calibration/buffer_entropy_10bins": 0.9759505965841125, "calibration/buffer_entropy_50bins": 0.9833908527361022, "calibration/confidence_entropy": 0.48900815017571675, "calibration/coverage@0%": 0.06475746949854295, "calibration/coverage@1%": 0.08512300474658473, "calibration/coverage@10%": 0.6281017851935031, "calibration/coverage@15%": 0.7311611629993731, "calibration/coverage@20%": 0.7874305476139628, "calibration/coverage@25%": 0.8517333333333333, "calibration/coverage@30%": 0.8879999999999999, "calibration/coverage@5%": 0.4113532997891937, "calibration/ece": 0.1564265309065293, "calibration/mean_confidence": 0.6150749953655152, "calibration/prompt_uniqueness": 0.8499751975720826, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016927083333333325, "completions/max_length": 3559.4, "completions/max_terminated_length": 3559.4, "completions/mean_length": 1031.0409912109376, "completions/mean_terminated_length": 1048.8473876953126, "completions/min_length": 0.0, "completions/min_terminated_length": 324.4, "epoch": 0.38399520005999926, "grad_norm": 0.0002907900488935411, "learning_rate": 1.4457831325301204e-06, "loss": -0.0123, "num_tokens": 414012806.0, "reward": 0.9759169220924377, "reward_std": 0.1336510330438614, "rewards/accuracy_reward": 0.6717013835906982, "rewards/brier_reward": 0.8032342672348023, "rewards/confidence_uniqueness_reward": 0.9325896739959717, "rewards/format_reward": 0.982899296283722, "rewards/frontier_aurc_reward": -0.001580100622959435, "rewards/frontier_coverage_0": 0.02686268715187907, "rewards/frontier_coverage_1": 0.02686268715187907, "rewards/frontier_coverage_10": 0.02686268715187907, "rewards/frontier_coverage_15": 0.02580845048651099, "rewards/frontier_coverage_20": 0.0352854423224926, "rewards/frontier_coverage_25": 0.1048676148056984, "rewards/frontier_coverage_5": 0.02686268715187907, "rewards/frontier_ece_reward": 0.0036205228650942447, "rewards/frontier_entropy_batch_reward": -0.287257993221283, "signal/accuracy_reward/centered_abs_mean": 0.15773654282093047, "signal/accuracy_reward/group_bin_occupancy": 0.1951388888888889, "signal/accuracy_reward/group_std_mean": 0.20291894674301147, "signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07886827141046523, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07886827141046523, "signal/advantage_abs_mean": 0.0999557062983513, "signal/advantage_pre_scale_abs_mean": 0.0999557062983513, "signal/advantage_pre_scale_std": 0.1613948255777359, "signal/advantage_std": 0.1613948255777359, "signal/brier_reward/centered_abs_mean": 0.13456721603870392, "signal/brier_reward/group_bin_occupancy": 0.8225694444444445, "signal/brier_reward/group_std_mean": 0.17532566785812378, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013456722162663937, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013456722162663937, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03702561557292938, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8114583333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.062202471494674685, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003702561743557453, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003702561743557453, "signal/format_reward/centered_abs_mean": 0.02643771693110466, "signal/format_reward/group_bin_occupancy": 0.15069444444444444, "signal/format_reward/group_std_mean": 0.04977613650262356, "signal/format_reward/group_zero_std_frac": 0.794444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01321885846555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01321885846555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.001971296383999288, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7038194444444443, "signal/frontier_aurc_reward/group_std_mean": 0.0035364361479878425, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4641206255182625e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4641206255182625e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18115276098251343, "signal/frontier_coverage_0/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_0/group_std_mean": 0.237866547703743, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_1/centered_abs_mean": 0.18115276098251343, "signal/frontier_coverage_1/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_1/group_std_mean": 0.237866547703743, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_10/centered_abs_mean": 0.18115276098251343, "signal/frontier_coverage_10/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_10/group_std_mean": 0.237866547703743, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_15/centered_abs_mean": 0.1428930014371872, "signal/frontier_coverage_15/group_bin_occupancy": 0.8180555555555555, "signal/frontier_coverage_15/group_std_mean": 0.1900169789791107, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017861625878140331, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017861625878140331, "signal/frontier_coverage_20/centered_abs_mean": 0.06546353325247764, "signal/frontier_coverage_20/group_bin_occupancy": 0.8875, "signal/frontier_coverage_20/group_std_mean": 0.08640649169683456, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008182941586710512, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008182941586710512, "signal/frontier_coverage_25/centered_abs_mean": 0.08405493348836898, "signal/frontier_coverage_25/group_bin_occupancy": 0.9111111111111111, "signal/frontier_coverage_25/group_std_mean": 0.10799293369054794, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010506867663934826, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010506867663934826, "signal/frontier_coverage_5/centered_abs_mean": 0.18115276098251343, "signal/frontier_coverage_5/group_bin_occupancy": 0.8267361111111111, "signal/frontier_coverage_5/group_std_mean": 0.237866547703743, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002264409465715289, "signal/frontier_ece_reward/centered_abs_mean": 0.019202812016010283, "signal/frontier_ece_reward/group_bin_occupancy": 0.7010416666666667, "signal/frontier_ece_reward/group_std_mean": 0.02441619634628296, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019202813040465117, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019202813040465117, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32310463190078736, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7458333333333333, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3927301824092865, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03231046348810196, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03231046348810196, "step": 160 }, { "calibration/aurc": 0.15014929418501763, "calibration/batch_distribution_entropy": 0.9648822577428188, "calibration/batch_entropy_100bins": 0.9536192526711391, "calibration/batch_entropy_10bins": 0.9648822577428188, "calibration/batch_entropy_50bins": 0.9641713027389625, "calibration/batch_uniqueness": 0.9491115920985426, "calibration/buffer_distribution_entropy": 0.9799014261000714, "calibration/buffer_entropy_100bins": 0.9880403664396292, "calibration/buffer_entropy_10bins": 0.9799014261000714, "calibration/buffer_entropy_50bins": 0.9871609349048261, "calibration/confidence_entropy": 0.47602209908381665, "calibration/coverage@0%": 0.04914401400644559, "calibration/coverage@1%": 0.17886424796002687, "calibration/coverage@10%": 0.4479380671321317, "calibration/coverage@15%": 0.5937473627521253, "calibration/coverage@20%": 0.6905573444066685, "calibration/coverage@25%": 0.7322448470073357, "calibration/coverage@30%": 0.8138341728899103, "calibration/coverage@5%": 0.38675869118806394, "calibration/ece": 0.19224718889496667, "calibration/mean_confidence": 0.5294937305316039, "calibration/prompt_uniqueness": 0.8480453952249729, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888884, "completions/max_length": 3866.6, "completions/max_terminated_length": 3866.6, "completions/mean_length": 1037.01435546875, "completions/mean_terminated_length": 1053.4653564453124, "completions/min_length": 0.0, "completions/min_terminated_length": 347.4, "epoch": 0.39599505006187424, "grad_norm": 0.0003302933764643967, "learning_rate": 1.2951807228915664e-06, "loss": -0.011, "num_tokens": 429098283.0, "reward": 0.9782995223999024, "reward_std": 0.12833615243434907, "rewards/accuracy_reward": 0.6689236164093018, "rewards/brier_reward": 0.7952205181121826, "rewards/confidence_uniqueness_reward": 0.9354442358016968, "rewards/format_reward": 0.9842013835906982, "rewards/frontier_aurc_reward": -0.0011664081714116037, "rewards/frontier_coverage_0": 0.028777531534433364, "rewards/frontier_coverage_1": 0.028777531534433364, "rewards/frontier_coverage_10": 0.028777531534433364, "rewards/frontier_coverage_15": 0.03406037017703056, "rewards/frontier_coverage_20": 0.047785230726003644, "rewards/frontier_coverage_25": 0.11350053399801255, "rewards/frontier_coverage_5": 0.028777531534433364, "rewards/frontier_ece_reward": 0.0018203054147306829, "rewards/frontier_entropy_batch_reward": -0.2537760019302368, "signal/accuracy_reward/centered_abs_mean": 0.1449761286377907, "signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666, "signal/accuracy_reward/group_std_mean": 0.19713898301124572, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07248806431889535, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07248806431889535, "signal/advantage_abs_mean": 0.0937883585691452, "signal/advantage_pre_scale_abs_mean": 0.0937883585691452, "signal/advantage_pre_scale_std": 0.15323749482631682, "signal/advantage_std": 0.15323749482631682, "signal/brier_reward/centered_abs_mean": 0.13555509746074676, "signal/brier_reward/group_bin_occupancy": 0.8256944444444445, "signal/brier_reward/group_std_mean": 0.1763432115316391, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013555509969592094, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013555509969592094, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.035801272839307785, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8256944444444445, "signal/confidence_uniqueness_reward/group_std_mean": 0.057362791150808334, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035801273304969074, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035801273304969074, "signal/format_reward/centered_abs_mean": 0.025390624813735486, "signal/format_reward/group_bin_occupancy": 0.1472222222222222, "signal/format_reward/group_std_mean": 0.0450144499540329, "signal/format_reward/group_zero_std_frac": 0.8222222447395324, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012695312406867743, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012695312406867743, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013781745452433824, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71875, "signal/frontier_aurc_reward/group_std_mean": 0.002441568742506206, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7227181888301857e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7227181888301857e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19617225527763366, "signal/frontier_coverage_0/group_bin_occupancy": 0.8229166666666666, "signal/frontier_coverage_0/group_std_mean": 0.25922334790229795, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_1/centered_abs_mean": 0.19617225527763366, "signal/frontier_coverage_1/group_bin_occupancy": 0.8229166666666666, "signal/frontier_coverage_1/group_std_mean": 0.25922334790229795, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_10/centered_abs_mean": 0.19617225527763366, "signal/frontier_coverage_10/group_bin_occupancy": 0.8229166666666666, "signal/frontier_coverage_10/group_std_mean": 0.25922334790229795, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_15/centered_abs_mean": 0.13480380773544312, "signal/frontier_coverage_15/group_bin_occupancy": 0.8225694444444445, "signal/frontier_coverage_15/group_std_mean": 0.17995196878910064, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016850476153194905, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016850476153194905, "signal/frontier_coverage_20/centered_abs_mean": 0.06469424068927765, "signal/frontier_coverage_20/group_bin_occupancy": 0.9013888888888889, "signal/frontier_coverage_20/group_std_mean": 0.08368255645036697, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008086780086159706, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008086780086159706, "signal/frontier_coverage_25/centered_abs_mean": 0.08406549245119095, "signal/frontier_coverage_25/group_bin_occupancy": 0.8961805555555555, "signal/frontier_coverage_25/group_std_mean": 0.10824144333600998, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010508187115192413, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010508187115192413, "signal/frontier_coverage_5/centered_abs_mean": 0.19617225527763366, "signal/frontier_coverage_5/group_bin_occupancy": 0.8229166666666666, "signal/frontier_coverage_5/group_std_mean": 0.25922334790229795, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002452153339982033, "signal/frontier_ece_reward/centered_abs_mean": 0.019796424731612205, "signal/frontier_ece_reward/group_bin_occupancy": 0.6861111111111111, "signal/frontier_ece_reward/group_std_mean": 0.025375865027308465, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001979642570950091, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001979642570950091, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3119848847389221, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7420138888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38362287282943724, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031198487058281897, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031198487058281897, "step": 165 }, { "calibration/aurc": 0.10765388586400701, "calibration/batch_distribution_entropy": 0.9201008680078046, "calibration/batch_entropy_100bins": 0.9304532773423361, "calibration/batch_entropy_10bins": 0.9201008680078046, "calibration/batch_entropy_50bins": 0.9365333991429792, "calibration/batch_uniqueness": 0.9396124919073392, "calibration/buffer_distribution_entropy": 0.9826422244857502, "calibration/buffer_entropy_100bins": 0.9905602665567612, "calibration/buffer_entropy_10bins": 0.9826422244857502, "calibration/buffer_entropy_50bins": 0.9892516637227805, "calibration/confidence_entropy": 0.45945668982066856, "calibration/coverage@0%": 0.02905259854202034, "calibration/coverage@1%": 0.02905259854202034, "calibration/coverage@10%": 0.6277144528296503, "calibration/coverage@15%": 0.8055394667389475, "calibration/coverage@20%": 0.901696220950968, "calibration/coverage@25%": 0.9473329814400046, "calibration/coverage@30%": 0.9798907228554719, "calibration/coverage@5%": 0.18769134199751772, "calibration/ece": 0.11805985067322597, "calibration/mean_confidence": 0.6558299405476236, "calibration/prompt_uniqueness": 0.8448339007235921, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014409722222222232, "completions/max_length": 3814.6, "completions/max_terminated_length": 3814.6, "completions/mean_length": 956.7868041992188, "completions/mean_terminated_length": 970.8463134765625, "completions/min_length": 0.0, "completions/min_terminated_length": 320.4, "epoch": 0.4079949000637492, "grad_norm": 0.00033093104138970375, "learning_rate": 1.1445783132530121e-06, "loss": -0.0113, "num_tokens": 443209651.0, "reward": 0.9993075251579284, "reward_std": 0.13218997418880463, "rewards/accuracy_reward": 0.7213541626930237, "rewards/brier_reward": 0.8155832767486573, "rewards/confidence_uniqueness_reward": 0.9315274119377136, "rewards/format_reward": 0.9855034708976745, "rewards/frontier_aurc_reward": -0.0015495633939281105, "rewards/frontier_coverage_0": 0.006550856120884419, "rewards/frontier_coverage_1": 0.006550856120884419, "rewards/frontier_coverage_10": 0.006582287885248661, "rewards/frontier_coverage_15": 0.021447673067450525, "rewards/frontier_coverage_20": 0.06830336079001427, "rewards/frontier_coverage_25": 0.16457977890968323, "rewards/frontier_coverage_5": 0.006550856120884419, "rewards/frontier_ece_reward": 0.0008198617259040474, "rewards/frontier_entropy_batch_reward": -0.32402017116546633, "signal/accuracy_reward/centered_abs_mean": 0.151953125, "signal/accuracy_reward/group_bin_occupancy": 0.19895833333333332, "signal/accuracy_reward/group_std_mean": 0.2033730238676071, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0759765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0759765625, "signal/advantage_abs_mean": 0.09784262776374816, "signal/advantage_pre_scale_abs_mean": 0.09784262776374816, "signal/advantage_pre_scale_std": 0.1593571901321411, "signal/advantage_std": 0.1593571901321411, "signal/brier_reward/centered_abs_mean": 0.13310655802488328, "signal/brier_reward/group_bin_occupancy": 0.8138888888888889, "signal/brier_reward/group_std_mean": 0.1739350289106369, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013310655951499939, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013310655951499939, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03614392466843128, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8378472222222222, "signal/confidence_uniqueness_reward/group_std_mean": 0.05578758716583252, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036143924575299025, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036143924575299025, "signal/format_reward/centered_abs_mean": 0.02267252579331398, "signal/format_reward/group_bin_occupancy": 0.14444444444444443, "signal/format_reward/group_std_mean": 0.03960092887282372, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01133626289665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01133626289665699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021186517318710686, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6819444444444445, "signal/frontier_aurc_reward/group_std_mean": 0.003828370710834861, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.648314693942666e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.648314693942666e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1690923511981964, "signal/frontier_coverage_0/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_0/group_std_mean": 0.22359244525432587, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021136544179171323, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021136544179171323, "signal/frontier_coverage_1/centered_abs_mean": 0.1690923511981964, "signal/frontier_coverage_1/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_1/group_std_mean": 0.22359244525432587, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021136544179171323, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021136544179171323, "signal/frontier_coverage_10/centered_abs_mean": 0.1690543532371521, "signal/frontier_coverage_10/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_10/group_std_mean": 0.2235435426235199, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021131794434040784, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021131794434040784, "signal/frontier_coverage_15/centered_abs_mean": 0.10032611638307572, "signal/frontier_coverage_15/group_bin_occupancy": 0.8232638888888889, "signal/frontier_coverage_15/group_std_mean": 0.13506484925746917, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012540765106678008, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012540765106678008, "signal/frontier_coverage_20/centered_abs_mean": 0.06498248800635338, "signal/frontier_coverage_20/group_bin_occupancy": 0.9329861111111111, "signal/frontier_coverage_20/group_std_mean": 0.08248871117830277, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008122811093926429, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008122811093926429, "signal/frontier_coverage_25/centered_abs_mean": 0.10937306880950928, "signal/frontier_coverage_25/group_bin_occupancy": 0.8958333333333334, "signal/frontier_coverage_25/group_std_mean": 0.14055634438991546, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013671633554622532, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013671633554622532, "signal/frontier_coverage_5/centered_abs_mean": 0.1690923511981964, "signal/frontier_coverage_5/group_bin_occupancy": 0.8170138888888889, "signal/frontier_coverage_5/group_std_mean": 0.22359244525432587, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021136544179171323, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021136544179171323, "signal/frontier_ece_reward/centered_abs_mean": 0.017915211990475653, "signal/frontier_ece_reward/group_bin_occupancy": 0.7027777777777777, "signal/frontier_ece_reward/group_std_mean": 0.02288214974105358, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017915211617946624, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017915211617946624, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3317939579486847, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40215290188789365, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03317939639091492, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03317939639091492, "step": 170 }, { "calibration/aurc": 0.12350982471083516, "calibration/batch_distribution_entropy": 0.9672992669923769, "calibration/batch_entropy_100bins": 0.9545196973741442, "calibration/batch_entropy_10bins": 0.9672992669923769, "calibration/batch_entropy_50bins": 0.9654162654792131, "calibration/batch_uniqueness": 0.9499382833178359, "calibration/buffer_distribution_entropy": 0.982073392424633, "calibration/buffer_entropy_100bins": 0.9903093056557356, "calibration/buffer_entropy_10bins": 0.982073392424633, "calibration/buffer_entropy_50bins": 0.9888724223724161, "calibration/confidence_entropy": 0.5193928635458319, "calibration/coverage@0%": 0.06705871595159879, "calibration/coverage@1%": 0.06705871595159879, "calibration/coverage@10%": 0.45455122392336095, "calibration/coverage@15%": 0.6717985421009731, "calibration/coverage@20%": 0.8306836402154552, "calibration/coverage@25%": 0.9227183013732783, "calibration/coverage@30%": 0.9658543341712893, "calibration/coverage@5%": 0.2820532838090668, "calibration/ece": 0.16841145912524702, "calibration/mean_confidence": 0.5635972691055217, "calibration/prompt_uniqueness": 0.8650919759662437, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01684027777777779, "completions/max_length": 3941.2, "completions/max_terminated_length": 3941.2, "completions/mean_length": 955.89921875, "completions/mean_terminated_length": 972.2580932617187, "completions/min_length": 0.0, "completions/min_terminated_length": 300.0, "epoch": 0.4199947500656242, "grad_norm": 0.00031754354131408036, "learning_rate": 9.93975903614458e-07, "loss": -0.014, "num_tokens": 457329578.0, "reward": 0.9924328446388244, "reward_std": 0.13685493767261506, "rewards/accuracy_reward": 0.7052083373069763, "rewards/brier_reward": 0.8050451636314392, "rewards/confidence_uniqueness_reward": 0.9324544668197632, "rewards/format_reward": 0.9828993082046509, "rewards/frontier_aurc_reward": -0.0010705198394134641, "rewards/frontier_coverage_0": 0.00536943394690752, "rewards/frontier_coverage_1": 0.00536943394690752, "rewards/frontier_coverage_10": 0.0054389465600252155, "rewards/frontier_coverage_15": 0.022105094417929648, "rewards/frontier_coverage_20": 0.06661936640739441, "rewards/frontier_coverage_25": 0.15077317059040068, "rewards/frontier_coverage_5": 0.00536943394690752, "rewards/frontier_ece_reward": -0.0005749327523517422, "rewards/frontier_entropy_batch_reward": -0.28563171029090884, "signal/accuracy_reward/centered_abs_mean": 0.15720486342906953, "signal/accuracy_reward/group_bin_occupancy": 0.20069444444444443, "signal/accuracy_reward/group_std_mean": 0.21156745851039888, "signal/accuracy_reward/group_zero_std_frac": 0.3944444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07860243171453477, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07860243171453477, "signal/advantage_abs_mean": 0.10034923404455184, "signal/advantage_pre_scale_abs_mean": 0.10034923404455184, "signal/advantage_pre_scale_std": 0.16388348042964934, "signal/advantage_std": 0.16388348042964934, "signal/brier_reward/centered_abs_mean": 0.1324082151055336, "signal/brier_reward/group_bin_occupancy": 0.842013888888889, "signal/brier_reward/group_std_mean": 0.17141578793525697, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013240821473300458, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013240821473300458, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03817100264132023, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8347222222222221, "signal/confidence_uniqueness_reward/group_std_mean": 0.059024860709905626, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003817100077867508, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003817100077867508, "signal/format_reward/centered_abs_mean": 0.02693684846162796, "signal/format_reward/group_bin_occupancy": 0.14618055555555556, "signal/format_reward/group_std_mean": 0.04554474353790283, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01346842423081398, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01346842423081398, "signal/frontier_aurc_reward/centered_abs_mean": 0.001381588843651116, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7100694444444444, "signal/frontier_aurc_reward/group_std_mean": 0.002594554144889116, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7269860109081492e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7269860109081492e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18175126016139984, "signal/frontier_coverage_0/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_0/group_std_mean": 0.24015596210956575, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002271890779957175, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002271890779957175, "signal/frontier_coverage_1/centered_abs_mean": 0.18175126016139984, "signal/frontier_coverage_1/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_1/group_std_mean": 0.24015596210956575, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002271890779957175, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002271890779957175, "signal/frontier_coverage_10/centered_abs_mean": 0.18154462277889252, "signal/frontier_coverage_10/group_bin_occupancy": 0.8340277777777778, "signal/frontier_coverage_10/group_std_mean": 0.2398768812417984, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022693077102303506, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022693077102303506, "signal/frontier_coverage_15/centered_abs_mean": 0.09890482127666474, "signal/frontier_coverage_15/group_bin_occupancy": 0.8482638888888889, "signal/frontier_coverage_15/group_std_mean": 0.13202964663505554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012363103218376637, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012363103218376637, "signal/frontier_coverage_20/centered_abs_mean": 0.06385754197835922, "signal/frontier_coverage_20/group_bin_occupancy": 0.9204861111111111, "signal/frontier_coverage_20/group_std_mean": 0.08191778510808945, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007982192793861032, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007982192793861032, "signal/frontier_coverage_25/centered_abs_mean": 0.10205547660589218, "signal/frontier_coverage_25/group_bin_occupancy": 0.909375, "signal/frontier_coverage_25/group_std_mean": 0.13224542140960693, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012756934389472007, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012756934389472007, "signal/frontier_coverage_5/centered_abs_mean": 0.18175126016139984, "signal/frontier_coverage_5/group_bin_occupancy": 0.8336805555555555, "signal/frontier_coverage_5/group_std_mean": 0.24015596210956575, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002271890779957175, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002271890779957175, "signal/frontier_ece_reward/centered_abs_mean": 0.01852937713265419, "signal/frontier_ece_reward/group_bin_occupancy": 0.7, "signal/frontier_ece_reward/group_std_mean": 0.023432932049036025, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018529377412050962, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018529377412050962, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33074782490730287, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7576388888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40171239376068113, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03307478278875351, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03307478278875351, "step": 175 }, { "calibration/aurc": 0.09333554035618889, "calibration/batch_distribution_entropy": 0.9642334917542463, "calibration/batch_entropy_100bins": 0.9496345841886905, "calibration/batch_entropy_10bins": 0.9642334917542463, "calibration/batch_entropy_50bins": 0.9616782721713711, "calibration/batch_uniqueness": 0.9488292532918384, "calibration/buffer_distribution_entropy": 0.9812171950001629, "calibration/buffer_entropy_100bins": 0.9899411271146056, "calibration/buffer_entropy_10bins": 0.9812171950001629, "calibration/buffer_entropy_50bins": 0.9884013265090363, "calibration/confidence_entropy": 0.5117064412428437, "calibration/coverage@0%": 0.11779823844610977, "calibration/coverage@1%": 0.12516665949874134, "calibration/coverage@10%": 0.6127230239560119, "calibration/coverage@15%": 0.8097812733762957, "calibration/coverage@20%": 0.8916710300405335, "calibration/coverage@25%": 0.9569934360730594, "calibration/coverage@30%": 0.9875, "calibration/coverage@5%": 0.3797779002231238, "calibration/ece": 0.17150899211730591, "calibration/mean_confidence": 0.591707829096662, "calibration/prompt_uniqueness": 0.8579332667838413, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014149305555555537, "completions/max_length": 3917.2, "completions/max_terminated_length": 3917.2, "completions/mean_length": 926.2488037109375, "completions/mean_terminated_length": 939.5196899414062, "completions/min_length": 0.0, "completions/min_terminated_length": 312.8, "epoch": 0.4319946000674992, "grad_norm": 0.000292018405161798, "learning_rate": 8.433734939759036e-07, "loss": -0.011, "num_tokens": 471099932.0, "reward": 0.9934727311134338, "reward_std": 0.13324700593948363, "rewards/accuracy_reward": 0.704600703716278, "rewards/brier_reward": 0.7962529778480529, "rewards/confidence_uniqueness_reward": 0.936048150062561, "rewards/format_reward": 0.9856770753860473, "rewards/frontier_aurc_reward": -0.0013238670071586967, "rewards/frontier_coverage_0": -0.0011739198584109546, "rewards/frontier_coverage_1": -0.0011739198584109546, "rewards/frontier_coverage_10": -0.001000142702832818, "rewards/frontier_coverage_15": 0.020229480788111688, "rewards/frontier_coverage_20": 0.06501827016472816, "rewards/frontier_coverage_25": 0.14618382453918458, "rewards/frontier_coverage_5": -0.0011739198584109546, "rewards/frontier_ece_reward": -0.0018186770612373948, "rewards/frontier_entropy_batch_reward": -0.2753425925970078, "signal/accuracy_reward/centered_abs_mean": 0.15680880844593048, "signal/accuracy_reward/group_bin_occupancy": 0.1965277777777778, "signal/accuracy_reward/group_std_mean": 0.20510988235473632, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07840440422296524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07840440422296524, "signal/advantage_abs_mean": 0.09849014431238175, "signal/advantage_pre_scale_abs_mean": 0.09849014431238175, "signal/advantage_pre_scale_std": 0.15953330397605897, "signal/advantage_std": 0.15953330397605897, "signal/brier_reward/centered_abs_mean": 0.13330689668655396, "signal/brier_reward/group_bin_occupancy": 0.8267361111111111, "signal/brier_reward/group_std_mean": 0.17314621210098266, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013330690003931523, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013330690003931523, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03478739969432354, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8302083333333334, "signal/confidence_uniqueness_reward/group_std_mean": 0.05585672035813331, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034787400159984826, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034787400159984826, "signal/format_reward/centered_abs_mean": 0.02369249127805233, "signal/format_reward/group_bin_occupancy": 0.14652777777777776, "signal/format_reward/group_std_mean": 0.042657271027565, "signal/format_reward/group_zero_std_frac": 0.8277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011846245639026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011846245639026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014823697507381438, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6843750000000001, "signal/frontier_aurc_reward/group_std_mean": 0.002641508309170604, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8529622684582138e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8529622684582138e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18787881731987, "signal/frontier_coverage_0/group_bin_occupancy": 0.8378472222222222, "signal/frontier_coverage_0/group_std_mean": 0.24395534098148347, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023484852630645038, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023484852630645038, "signal/frontier_coverage_1/centered_abs_mean": 0.18787881731987, "signal/frontier_coverage_1/group_bin_occupancy": 0.8378472222222222, "signal/frontier_coverage_1/group_std_mean": 0.24395534098148347, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023484852630645038, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023484852630645038, "signal/frontier_coverage_10/centered_abs_mean": 0.18747189342975618, "signal/frontier_coverage_10/group_bin_occupancy": 0.8371527777777779, "signal/frontier_coverage_10/group_std_mean": 0.24344970285892487, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023433986585587262, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023433986585587262, "signal/frontier_coverage_15/centered_abs_mean": 0.08262652903795242, "signal/frontier_coverage_15/group_bin_occupancy": 0.8614583333333332, "signal/frontier_coverage_15/group_std_mean": 0.10986567437648773, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010328316362574696, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010328316362574696, "signal/frontier_coverage_20/centered_abs_mean": 0.06542501747608184, "signal/frontier_coverage_20/group_bin_occupancy": 0.9225694444444444, "signal/frontier_coverage_20/group_std_mean": 0.08352421820163727, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008178127114661038, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008178127114661038, "signal/frontier_coverage_25/centered_abs_mean": 0.10522469878196716, "signal/frontier_coverage_25/group_bin_occupancy": 0.9090277777777779, "signal/frontier_coverage_25/group_std_mean": 0.13572756350040435, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013153087813407183, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013153087813407183, "signal/frontier_coverage_5/centered_abs_mean": 0.18787881731987, "signal/frontier_coverage_5/group_bin_occupancy": 0.8378472222222222, "signal/frontier_coverage_5/group_std_mean": 0.24395534098148347, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023484852630645038, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023484852630645038, "signal/frontier_ece_reward/centered_abs_mean": 0.018781586736440658, "signal/frontier_ece_reward/group_bin_occupancy": 0.6805555555555556, "signal/frontier_ece_reward/group_std_mean": 0.02334692105650902, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018781586550176144, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018781586550176144, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329127550125122, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7572916666666667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4022037506103516, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03329127728939056, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03329127728939056, "step": 180 }, { "calibration/aurc": 0.17310808674125525, "calibration/batch_distribution_entropy": 0.9739914521413097, "calibration/batch_entropy_100bins": 0.9587801895009787, "calibration/batch_entropy_10bins": 0.9739914521413097, "calibration/batch_entropy_50bins": 0.9714626911723826, "calibration/batch_uniqueness": 0.9516766269618417, "calibration/buffer_distribution_entropy": 0.9820326552070876, "calibration/buffer_entropy_100bins": 0.9903904724796785, "calibration/buffer_entropy_10bins": 0.9820326552070876, "calibration/buffer_entropy_50bins": 0.9889127545565767, "calibration/confidence_entropy": 0.5184269578648062, "calibration/coverage@0%": 0.009482740614904756, "calibration/coverage@1%": 0.009482740614904756, "calibration/coverage@10%": 0.22542069316029859, "calibration/coverage@15%": 0.4309821279380639, "calibration/coverage@20%": 0.8618492692076174, "calibration/coverage@25%": 0.9274268617021277, "calibration/coverage@30%": 0.9595744680851064, "calibration/coverage@5%": 0.035237447087000114, "calibration/ece": 0.22277728809670955, "calibration/mean_confidence": 0.5523523413300608, "calibration/prompt_uniqueness": 0.862558140679127, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014149305555555537, "completions/max_length": 3864.0, "completions/max_terminated_length": 3864.0, "completions/mean_length": 923.6966186523438, "completions/mean_terminated_length": 937.0326904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 282.8, "epoch": 0.44399445006937416, "grad_norm": 0.00034543531364761293, "learning_rate": 6.927710843373495e-07, "loss": -0.0101, "num_tokens": 484830965.0, "reward": 0.984534227848053, "reward_std": 0.14124380350112914, "rewards/accuracy_reward": 0.6821180582046509, "rewards/brier_reward": 0.7921573638916015, "rewards/confidence_uniqueness_reward": 0.9371705174446106, "rewards/format_reward": 0.9855902671813965, "rewards/frontier_aurc_reward": -0.001408666034694761, "rewards/frontier_coverage_0": 0.009120326023548841, "rewards/frontier_coverage_1": 0.009120326023548841, "rewards/frontier_coverage_10": 0.009218692220747471, "rewards/frontier_coverage_15": 0.028891825303435325, "rewards/frontier_coverage_20": 0.06944562941789627, "rewards/frontier_coverage_25": 0.14148974865674974, "rewards/frontier_coverage_5": 0.009120326023548841, "rewards/frontier_ece_reward": -0.00228926861891523, "rewards/frontier_entropy_batch_reward": -0.25461295545101165, "signal/accuracy_reward/centered_abs_mean": 0.17507595419883729, "signal/accuracy_reward/group_bin_occupancy": 0.20277777777777778, "signal/accuracy_reward/group_std_mean": 0.22589021325111389, "signal/accuracy_reward/group_zero_std_frac": 0.37777777314186095, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08753797709941864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08753797709941864, "signal/advantage_abs_mean": 0.10619968473911286, "signal/advantage_pre_scale_abs_mean": 0.10619968473911286, "signal/advantage_pre_scale_std": 0.16439965069293977, "signal/advantage_std": 0.16439965069293977, "signal/brier_reward/centered_abs_mean": 0.14218833446502685, "signal/brier_reward/group_bin_occupancy": 0.8506944444444443, "signal/brier_reward/group_std_mean": 0.18213841021060945, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014218833483755588, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014218833483755588, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034088420867919925, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8260416666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.05641009286046028, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003408842021599412, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003408842021599412, "signal/format_reward/centered_abs_mean": 0.02331814244389534, "signal/format_reward/group_bin_occupancy": 0.14756944444444445, "signal/format_reward/group_std_mean": 0.043689073622226716, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01165907122194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01165907122194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017288225702941419, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6871527777777777, "signal/frontier_aurc_reward/group_std_mean": 0.003313097590580583, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.161028405680554e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.161028405680554e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20681215226650237, "signal/frontier_coverage_0/group_bin_occupancy": 0.8489583333333334, "signal/frontier_coverage_0/group_std_mean": 0.2656663477420807, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025851519778370856, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025851519778370856, "signal/frontier_coverage_1/centered_abs_mean": 0.20681215226650237, "signal/frontier_coverage_1/group_bin_occupancy": 0.8489583333333334, "signal/frontier_coverage_1/group_std_mean": 0.2656663477420807, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025851519778370856, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025851519778370856, "signal/frontier_coverage_10/centered_abs_mean": 0.20644972324371338, "signal/frontier_coverage_10/group_bin_occupancy": 0.8489583333333334, "signal/frontier_coverage_10/group_std_mean": 0.2652065873146057, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025806216057389975, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025806216057389975, "signal/frontier_coverage_15/centered_abs_mean": 0.07617418020963669, "signal/frontier_coverage_15/group_bin_occupancy": 0.8944444444444445, "signal/frontier_coverage_15/group_std_mean": 0.0993105873465538, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009521772735752165, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009521772735752165, "signal/frontier_coverage_20/centered_abs_mean": 0.06728554219007492, "signal/frontier_coverage_20/group_bin_occupancy": 0.9090277777777779, "signal/frontier_coverage_20/group_std_mean": 0.08726846128702163, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008410692913457751, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008410692913457751, "signal/frontier_coverage_25/centered_abs_mean": 0.10936762541532516, "signal/frontier_coverage_25/group_bin_occupancy": 0.8864583333333333, "signal/frontier_coverage_25/group_std_mean": 0.143340665102005, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013670953223481775, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013670953223481775, "signal/frontier_coverage_5/centered_abs_mean": 0.20681215226650237, "signal/frontier_coverage_5/group_bin_occupancy": 0.8489583333333334, "signal/frontier_coverage_5/group_std_mean": 0.2656663477420807, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025851519778370856, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025851519778370856, "signal/frontier_ece_reward/centered_abs_mean": 0.019747552648186683, "signal/frontier_ece_reward/group_bin_occupancy": 0.671875, "signal/frontier_ece_reward/group_std_mean": 0.024326668307185172, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019747552461922167, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019747552461922167, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3227647304534912, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7583333333333332, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3943642437458038, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03227647431194782, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03227647431194782, "step": 185 }, { "calibration/aurc": 0.1515759443307232, "calibration/batch_distribution_entropy": 0.9423158571273464, "calibration/batch_entropy_100bins": 0.9422109701984105, "calibration/batch_entropy_10bins": 0.9423158571273464, "calibration/batch_entropy_50bins": 0.9492567887186854, "calibration/batch_uniqueness": 0.9448453349588843, "calibration/buffer_distribution_entropy": 0.9824175333911814, "calibration/buffer_entropy_100bins": 0.9906260527790651, "calibration/buffer_entropy_10bins": 0.9824175333911814, "calibration/buffer_entropy_50bins": 0.9891563716955704, "calibration/confidence_entropy": 0.4806348039531195, "calibration/coverage@0%": 0.05204903964451134, "calibration/coverage@1%": 0.05204903964451134, "calibration/coverage@10%": 0.40964392226487145, "calibration/coverage@15%": 0.48947900867296956, "calibration/coverage@20%": 0.8313676437985501, "calibration/coverage@25%": 0.9193766286216984, "calibration/coverage@30%": 0.9560233821947192, "calibration/coverage@5%": 0.18513113847849771, "calibration/ece": 0.15951463187555487, "calibration/mean_confidence": 0.6206274668368617, "calibration/prompt_uniqueness": 0.8566573182834241, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010850694444444465, "completions/max_length": 3679.0, "completions/max_terminated_length": 3679.0, "completions/mean_length": 895.5675415039062, "completions/mean_terminated_length": 905.3911865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 297.8, "epoch": 0.45599430007124914, "grad_norm": 0.0003213706368114799, "learning_rate": 5.421686746987952e-07, "loss": -0.0095, "num_tokens": 498230847.0, "reward": 1.0024492621421814, "reward_std": 0.13486847281455994, "rewards/accuracy_reward": 0.7212673664093018, "rewards/brier_reward": 0.8124405860900878, "rewards/confidence_uniqueness_reward": 0.9361489892005921, "rewards/format_reward": 0.989149296283722, "rewards/frontier_aurc_reward": -0.0017552036792039872, "rewards/frontier_coverage_0": 0.004374879878014326, "rewards/frontier_coverage_1": 0.004374879878014326, "rewards/frontier_coverage_10": 0.005093986354768276, "rewards/frontier_coverage_15": 0.038227176293730736, "rewards/frontier_coverage_20": 0.10161207318305969, "rewards/frontier_coverage_25": 0.19290560781955718, "rewards/frontier_coverage_5": 0.004374879878014326, "rewards/frontier_ece_reward": -0.002311352139804512, "rewards/frontier_entropy_batch_reward": -0.3175202667713165, "signal/accuracy_reward/centered_abs_mean": 0.1665961414575577, "signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888, "signal/accuracy_reward/group_std_mean": 0.22064870595932007, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08329807072877884, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08329807072877884, "signal/advantage_abs_mean": 0.09963465481996536, "signal/advantage_pre_scale_abs_mean": 0.09963465481996536, "signal/advantage_pre_scale_std": 0.1596238434314728, "signal/advantage_std": 0.1596238434314728, "signal/brier_reward/centered_abs_mean": 0.13139403611421585, "signal/brier_reward/group_bin_occupancy": 0.8072916666666666, "signal/brier_reward/group_std_mean": 0.1725013792514801, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013139403238892556, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013139403238892556, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03191892094910145, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8465277777777779, "signal/confidence_uniqueness_reward/group_std_mean": 0.05261510461568832, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003191892057657242, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003191892057657242, "signal/format_reward/centered_abs_mean": 0.01899414099752903, "signal/format_reward/group_bin_occupancy": 0.14444444444444446, "signal/format_reward/group_std_mean": 0.03723305016756058, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009497070498764516, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009497070498764516, "signal/frontier_aurc_reward/centered_abs_mean": 0.002246159012429416, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6746527777777779, "signal/frontier_aurc_reward/group_std_mean": 0.004121129959821701, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.80769876553677e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.80769876553677e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1798312783241272, "signal/frontier_coverage_0/group_bin_occupancy": 0.8173611111111111, "signal/frontier_coverage_0/group_std_mean": 0.2366260141134262, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022478910628706216, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022478910628706216, "signal/frontier_coverage_1/centered_abs_mean": 0.1798312783241272, "signal/frontier_coverage_1/group_bin_occupancy": 0.8173611111111111, "signal/frontier_coverage_1/group_std_mean": 0.2366260141134262, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022478910628706216, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022478910628706216, "signal/frontier_coverage_10/centered_abs_mean": 0.1779997855424881, "signal/frontier_coverage_10/group_bin_occupancy": 0.815625, "signal/frontier_coverage_10/group_std_mean": 0.23430375754833221, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002224997291341424, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002224997291341424, "signal/frontier_coverage_15/centered_abs_mean": 0.06649869680404663, "signal/frontier_coverage_15/group_bin_occupancy": 0.903125, "signal/frontier_coverage_15/group_std_mean": 0.08655229657888412, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008312337566167116, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008312337566167116, "signal/frontier_coverage_20/centered_abs_mean": 0.07941063195466995, "signal/frontier_coverage_20/group_bin_occupancy": 0.9229166666666668, "signal/frontier_coverage_20/group_std_mean": 0.10138371139764786, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009926329017616808, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009926329017616808, "signal/frontier_coverage_25/centered_abs_mean": 0.12681428492069244, "signal/frontier_coverage_25/group_bin_occupancy": 0.8920138888888889, "signal/frontier_coverage_25/group_std_mean": 0.16400834619998933, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015851786360144616, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015851786360144616, "signal/frontier_coverage_5/centered_abs_mean": 0.1798312783241272, "signal/frontier_coverage_5/group_bin_occupancy": 0.8173611111111111, "signal/frontier_coverage_5/group_std_mean": 0.2366260141134262, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022478910628706216, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022478910628706216, "signal/frontier_ece_reward/centered_abs_mean": 0.017472782731056215, "signal/frontier_ece_reward/group_bin_occupancy": 0.6562499999999999, "signal/frontier_ece_reward/group_std_mean": 0.021824596077203752, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017472783569246531, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017472783569246531, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34197773933410647, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4096067249774933, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034197773039340976, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034197773039340976, "step": 190 }, { "calibration/aurc": 0.18726056542504158, "calibration/batch_distribution_entropy": 0.973440015714076, "calibration/batch_entropy_100bins": 0.9598008556478849, "calibration/batch_entropy_10bins": 0.973440015714076, "calibration/batch_entropy_50bins": 0.9692972792520752, "calibration/batch_uniqueness": 0.9511735907416039, "calibration/buffer_distribution_entropy": 0.9823656842730131, "calibration/buffer_entropy_100bins": 0.9906323413760862, "calibration/buffer_entropy_10bins": 0.9823656842730131, "calibration/buffer_entropy_50bins": 0.9891423825566555, "calibration/confidence_entropy": 0.47507839773875, "calibration/coverage@0%": 0.03543482683308126, "calibration/coverage@1%": 0.03543482683308126, "calibration/coverage@10%": 0.38248000231358287, "calibration/coverage@15%": 0.459137786105719, "calibration/coverage@20%": 0.5653955251403547, "calibration/coverage@25%": 0.6885020825219144, "calibration/coverage@30%": 0.8656397790055248, "calibration/coverage@5%": 0.11277843696936363, "calibration/ece": 0.18294151824470856, "calibration/mean_confidence": 0.5592223827685562, "calibration/prompt_uniqueness": 0.8476112297769705, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888884, "completions/max_length": 3607.2, "completions/max_terminated_length": 3607.2, "completions/mean_length": 901.8653686523437, "completions/mean_terminated_length": 916.1808471679688, "completions/min_length": 0.0, "completions/min_terminated_length": 269.8, "epoch": 0.46799415007312406, "grad_norm": 0.00037715397775173187, "learning_rate": 3.91566265060241e-07, "loss": -0.0117, "num_tokens": 511701200.0, "reward": 0.9781022071838379, "reward_std": 0.13412580341100694, "rewards/accuracy_reward": 0.6714409708976745, "rewards/brier_reward": 0.7976385831832886, "rewards/confidence_uniqueness_reward": 0.9342209815979003, "rewards/format_reward": 0.9845486044883728, "rewards/frontier_aurc_reward": -0.00212171315215528, "rewards/frontier_coverage_0": 0.0240963838994503, "rewards/frontier_coverage_1": 0.0240963838994503, "rewards/frontier_coverage_10": 0.026926378719508648, "rewards/frontier_coverage_15": 0.04270496740937233, "rewards/frontier_coverage_20": 0.09552509933710099, "rewards/frontier_coverage_25": 0.17143839299678804, "rewards/frontier_coverage_5": 0.0240984745323658, "rewards/frontier_ece_reward": -0.0005812996299937367, "rewards/frontier_entropy_batch_reward": -0.28104971945285795, "signal/accuracy_reward/centered_abs_mean": 0.15088432729244233, "signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776, "signal/accuracy_reward/group_std_mean": 0.2036748230457306, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07544216364622117, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07544216364622117, "signal/advantage_abs_mean": 0.09861928075551987, "signal/advantage_pre_scale_abs_mean": 0.09861928075551987, "signal/advantage_pre_scale_std": 0.1607095330953598, "signal/advantage_std": 0.1607095330953598, "signal/brier_reward/centered_abs_mean": 0.1398274078965187, "signal/brier_reward/group_bin_occupancy": 0.821875, "signal/brier_reward/group_std_mean": 0.18089538514614106, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01398274227976799, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01398274227976799, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03514176532626152, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8291666666666666, "signal/confidence_uniqueness_reward/group_std_mean": 0.05587729141116142, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003514176746830344, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003514176746830344, "signal/format_reward/centered_abs_mean": 0.023600259982049464, "signal/format_reward/group_bin_occupancy": 0.14583333333333334, "signal/format_reward/group_std_mean": 0.04200470522046089, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011800129991024732, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011800129991024732, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025537875946611164, "signal/frontier_aurc_reward/group_bin_occupancy": 0.675, "signal/frontier_aurc_reward/group_std_mean": 0.004770330665633082, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.192234580637887e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.192234580637887e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17494458258152007, "signal/frontier_coverage_0/group_bin_occupancy": 0.8194444444444444, "signal/frontier_coverage_0/group_std_mean": 0.23222445845603942, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002186807314865291, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002186807314865291, "signal/frontier_coverage_1/centered_abs_mean": 0.17494458258152007, "signal/frontier_coverage_1/group_bin_occupancy": 0.8194444444444444, "signal/frontier_coverage_1/group_std_mean": 0.23222445845603942, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002186807314865291, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002186807314865291, "signal/frontier_coverage_10/centered_abs_mean": 0.1601300060749054, "signal/frontier_coverage_10/group_bin_occupancy": 0.810763888888889, "signal/frontier_coverage_10/group_std_mean": 0.2132750004529953, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020016250899061562, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020016250899061562, "signal/frontier_coverage_15/centered_abs_mean": 0.06419163271784782, "signal/frontier_coverage_15/group_bin_occupancy": 0.9180555555555555, "signal/frontier_coverage_15/group_std_mean": 0.08303456008434296, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008023954229429364, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008023954229429364, "signal/frontier_coverage_20/centered_abs_mean": 0.08162481337785721, "signal/frontier_coverage_20/group_bin_occupancy": 0.9072916666666666, "signal/frontier_coverage_20/group_std_mean": 0.10481662452220916, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010203101439401508, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010203101439401508, "signal/frontier_coverage_25/centered_abs_mean": 0.12805333733558655, "signal/frontier_coverage_25/group_bin_occupancy": 0.8833333333333332, "signal/frontier_coverage_25/group_std_mean": 0.16557440161705017, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016006667632609607, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016006667632609607, "signal/frontier_coverage_5/centered_abs_mean": 0.17488285303115844, "signal/frontier_coverage_5/group_bin_occupancy": 0.8194444444444444, "signal/frontier_coverage_5/group_std_mean": 0.23214463293552398, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021860357141122223, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021860357141122223, "signal/frontier_ece_reward/centered_abs_mean": 0.01619000006467104, "signal/frontier_ece_reward/group_bin_occupancy": 0.6190972222222222, "signal/frontier_ece_reward/group_std_mean": 0.020337154716253282, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016189999878406525, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016189999878406525, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32077054381370546, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7475694444444445, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3914342522621155, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0320770550519228, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0320770550519228, "step": 195 }, { "calibration/aurc": 0.1413520988930143, "calibration/batch_distribution_entropy": 0.9650686192766728, "calibration/batch_entropy_100bins": 0.9538989692481424, "calibration/batch_entropy_10bins": 0.9650686192766728, "calibration/batch_entropy_50bins": 0.9638523751233203, "calibration/batch_uniqueness": 0.9494360647400925, "calibration/buffer_distribution_entropy": 0.982301321487669, "calibration/buffer_entropy_100bins": 0.9905973370960849, "calibration/buffer_entropy_10bins": 0.982301321487669, "calibration/buffer_entropy_50bins": 0.9891011438371244, "calibration/confidence_entropy": 0.5122134728583332, "calibration/coverage@0%": 0.04735598198587883, "calibration/coverage@1%": 0.04735598198587883, "calibration/coverage@10%": 0.4334796882439303, "calibration/coverage@15%": 0.5858704996990151, "calibration/coverage@20%": 0.8214784767983201, "calibration/coverage@25%": 0.9256523961484797, "calibration/coverage@30%": 0.9576857723071821, "calibration/coverage@5%": 0.15706616943089902, "calibration/ece": 0.16479729043171218, "calibration/mean_confidence": 0.5972124416701616, "calibration/prompt_uniqueness": 0.864258559515551, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011718749999999977, "completions/max_length": 3774.8, "completions/max_terminated_length": 3774.8, "completions/mean_length": 884.886376953125, "completions/mean_terminated_length": 895.39912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 292.2, "epoch": 0.47999400007499904, "grad_norm": 0.0003483534383121878, "learning_rate": 2.409638554216868e-07, "loss": -0.0089, "num_tokens": 524962899.0, "reward": 0.9923826336860657, "reward_std": 0.13313800245523452, "rewards/accuracy_reward": 0.69296875, "rewards/brier_reward": 0.7982598781585694, "rewards/confidence_uniqueness_reward": 0.9397276759147644, "rewards/format_reward": 0.9881944417953491, "rewards/frontier_aurc_reward": -0.001622817711904645, "rewards/frontier_coverage_0": 0.009027415048331022, "rewards/frontier_coverage_1": 0.009027415048331022, "rewards/frontier_coverage_10": 0.013452378194779157, "rewards/frontier_coverage_15": 0.041050878912210466, "rewards/frontier_coverage_20": 0.0933085709810257, "rewards/frontier_coverage_25": 0.16813298761844636, "rewards/frontier_coverage_5": 0.009030948393046856, "rewards/frontier_ece_reward": -0.0034804839408025144, "rewards/frontier_entropy_batch_reward": -0.25917285978794097, "signal/accuracy_reward/centered_abs_mean": 0.15110134482383727, "signal/accuracy_reward/group_bin_occupancy": 0.19999999999999998, "signal/accuracy_reward/group_std_mean": 0.20413728952407836, "signal/accuracy_reward/group_zero_std_frac": 0.3999999940395355, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07555067241191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07555067241191864, "signal/advantage_abs_mean": 0.0965758740901947, "signal/advantage_pre_scale_abs_mean": 0.0965758740901947, "signal/advantage_pre_scale_std": 0.15648123919963836, "signal/advantage_std": 0.15648123919963836, "signal/brier_reward/centered_abs_mean": 0.13323327153921127, "signal/brier_reward/group_bin_occupancy": 0.836111111111111, "signal/brier_reward/group_std_mean": 0.1752035677433014, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013323327712714671, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013323327712714671, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030282768607139587, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8295138888888889, "signal/confidence_uniqueness_reward/group_std_mean": 0.053694164752960204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003028276888653636, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003028276888653636, "signal/format_reward/centered_abs_mean": 0.02018229179084301, "signal/format_reward/group_bin_occupancy": 0.1486111111111111, "signal/format_reward/group_std_mean": 0.04187272489070892, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010091145895421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010091145895421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018836703849956394, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222222, "signal/frontier_aurc_reward/group_std_mean": 0.0034654059447348116, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3545879957964645e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3545879957964645e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17963458895683287, "signal/frontier_coverage_0/group_bin_occupancy": 0.8232638888888888, "signal/frontier_coverage_0/group_std_mean": 0.23895832598209382, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002245432324707508, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002245432324707508, "signal/frontier_coverage_1/centered_abs_mean": 0.17963458895683287, "signal/frontier_coverage_1/group_bin_occupancy": 0.8232638888888888, "signal/frontier_coverage_1/group_std_mean": 0.23895832598209382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002245432324707508, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002245432324707508, "signal/frontier_coverage_10/centered_abs_mean": 0.15076417326927186, "signal/frontier_coverage_10/group_bin_occupancy": 0.820138888888889, "signal/frontier_coverage_10/group_std_mean": 0.2017397791147232, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018845521612092853, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018845521612092853, "signal/frontier_coverage_15/centered_abs_mean": 0.060654813051223756, "signal/frontier_coverage_15/group_bin_occupancy": 0.91875, "signal/frontier_coverage_15/group_std_mean": 0.07899993509054185, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007581851677969098, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007581851677969098, "signal/frontier_coverage_20/centered_abs_mean": 0.07843978106975555, "signal/frontier_coverage_20/group_bin_occupancy": 0.9059027777777778, "signal/frontier_coverage_20/group_std_mean": 0.10048370212316513, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009804973145946861, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009804973145946861, "signal/frontier_coverage_25/centered_abs_mean": 0.12252330780029297, "signal/frontier_coverage_25/group_bin_occupancy": 0.8850694444444442, "signal/frontier_coverage_25/group_std_mean": 0.1579478919506073, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015315414173528552, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015315414173528552, "signal/frontier_coverage_5/centered_abs_mean": 0.1795719563961029, "signal/frontier_coverage_5/group_bin_occupancy": 0.8232638888888888, "signal/frontier_coverage_5/group_std_mean": 0.2388751685619354, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022446493152529, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022446493152529, "signal/frontier_ece_reward/centered_abs_mean": 0.016645903512835503, "signal/frontier_ece_reward/group_bin_occupancy": 0.5958333333333333, "signal/frontier_ece_reward/group_std_mean": 0.020757382735610008, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016645904397591949, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016645904397591949, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31962995529174804, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3887501060962677, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031962993741035464, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031962993741035464, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.144239155861395, "eval_calibration/batch_distribution_entropy": 0.9221909700103857, "eval_calibration/batch_entropy_100bins": 0.7062575776226812, "eval_calibration/batch_entropy_10bins": 0.9221909700103857, "eval_calibration/batch_entropy_50bins": 0.7956526029853941, "eval_calibration/batch_uniqueness": 0.8993341113616218, "eval_calibration/buffer_distribution_entropy": 0.9820939519133489, "eval_calibration/buffer_entropy_100bins": 0.990473516652059, "eval_calibration/buffer_entropy_10bins": 0.9820939519133489, "eval_calibration/buffer_entropy_50bins": 0.9889406860438233, "eval_calibration/confidence_entropy": 0.5107617812049482, "eval_calibration/coverage@0%": 0.30532034050179213, "eval_calibration/coverage@1%": 0.30532034050179213, "eval_calibration/coverage@10%": 0.5592517921146953, "eval_calibration/coverage@15%": 0.6490031362007169, "eval_calibration/coverage@20%": 0.7074932795698925, "eval_calibration/coverage@25%": 0.9164986559139785, "eval_calibration/coverage@30%": 0.9375, "eval_calibration/coverage@5%": 0.33309811827956987, "eval_calibration/ece": 0.25243557157011315, "eval_calibration/mean_confidence": 0.544805507687573, "eval_calibration/prompt_uniqueness": 0.8993341113616218, "eval_completions/clipped_ratio": 0.01128472222222221, "eval_completions/max_length": 2559.5, "eval_completions/max_terminated_length": 2559.5, "eval_completions/mean_length": 878.668935139974, "eval_completions/mean_terminated_length": 888.665771484375, "eval_completions/min_length": 66.83333333333333, "eval_completions/min_terminated_length": 338.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 524962899.0, "eval_reward": 0.9085456728935242, "eval_reward_std": 0.236699769894282, "eval_rewards/accuracy_reward": 0.6822916666666666, "eval_rewards/brier_reward": 0.7939638197422028, "eval_rewards/confidence_uniqueness_reward": 0.8837526738643646, "eval_rewards/format_reward": 0.9887152711550394, "eval_rewards/frontier_aurc_reward": -0.0017724030088478078, "eval_rewards/frontier_coverage_0": 0.016208822838962078, "eval_rewards/frontier_coverage_1": 0.016208822838962078, "eval_rewards/frontier_coverage_10": 0.01999556540007082, "eval_rewards/frontier_coverage_15": 0.04217005521059036, "eval_rewards/frontier_coverage_20": 0.08990584810574849, "eval_rewards/frontier_coverage_25": 0.15949934472640356, "eval_rewards/frontier_coverage_5": 0.016208509060864646, "eval_rewards/frontier_ece_reward": -0.0033823695460644863, "eval_rewards/frontier_entropy_batch_reward": -0.9887152711550394, "eval_runtime": 192.0973, "eval_samples_per_second": 5.206, "eval_signal/accuracy_reward/centered_abs_mean": 0.4200303753217061, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.46436455845832825, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21001518766085306, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21001518766085306, "eval_signal/advantage_abs_mean": 0.2051889697710673, "eval_signal/advantage_pre_scale_abs_mean": 0.2051889697710673, "eval_signal/advantage_pre_scale_std": 0.2356510510047277, "eval_signal/advantage_std": 0.2356510510047277, "eval_signal/brier_reward/centered_abs_mean": 0.18867906431357065, "eval_signal/brier_reward/group_bin_occupancy": 0.8437500000000001, "eval_signal/brier_reward/group_std_mean": 0.24486981829007468, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018867906493445236, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018867906493445236, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.053733758007486664, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.39236111111111116, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08356440626084805, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005373376111189525, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005373376111189525, "eval_signal/format_reward/centered_abs_mean": 0.021538628575702507, "eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222, "eval_signal/format_reward/group_std_mean": 0.05486780156691869, "eval_signal/format_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010769314287851254, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.010769314287851254, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0029052970154831805, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5902777777777777, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006144408291826646, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.63162131786036e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.63162131786036e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2968921313683192, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_0/group_std_mean": 0.40362854798634845, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003711151541210711, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003711151541210711, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2968921313683192, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_1/group_std_mean": 0.40362854798634845, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003711151541210711, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003711151541210711, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.23208573708931604, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_10/group_std_mean": 0.32233305275440216, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029010717601825795, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029010717601825795, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.07928842430313428, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_15/group_std_mean": 0.10499530161420505, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009911053445345412, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009911053445345412, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.11974722519516945, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9236111111111112, "eval_signal/frontier_coverage_20/group_std_mean": 0.15377535670995712, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014968403847888112, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014968403847888112, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.21281012147665024, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9201388888888888, "eval_signal/frontier_coverage_25/group_std_mean": 0.2633419682582219, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002660126502936085, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002660126502936085, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.29676895836989087, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9166666666666666, "eval_signal/frontier_coverage_5/group_std_mean": 0.40348292887210846, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037096117545540133, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037096117545540133, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.026015580942233402, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.763888888888889, "eval_signal/frontier_ece_reward/group_std_mean": 0.03171707410365343, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026015581485504904, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026015581485504904, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.021538628575702507, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1597222222222222, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.05486780156691869, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002153862966224551, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002153862966224551, "eval_steps_per_second": 0.031, "step": 200 }, { "calibration/aurc": 0.15963940113709016, "calibration/batch_distribution_entropy": 0.9623404892352694, "calibration/batch_entropy_100bins": 0.9498760133778437, "calibration/batch_entropy_10bins": 0.9623404892352694, "calibration/batch_entropy_50bins": 0.9618336723769684, "calibration/batch_uniqueness": 0.9492106549711471, "calibration/buffer_distribution_entropy": 0.9825242016449753, "calibration/buffer_entropy_100bins": 0.9906980654782693, "calibration/buffer_entropy_10bins": 0.9825242016449753, "calibration/buffer_entropy_50bins": 0.9891967249919829, "calibration/confidence_entropy": 0.49405691099373455, "calibration/coverage@0%": 0.024152440271538782, "calibration/coverage@1%": 0.024152440271538782, "calibration/coverage@10%": 0.2680771936187322, "calibration/coverage@15%": 0.5601533512380341, "calibration/coverage@20%": 0.8208879719119526, "calibration/coverage@25%": 0.9063656280346862, "calibration/coverage@30%": 0.9609612165574376, "calibration/coverage@5%": 0.18524929721217218, "calibration/ece": 0.1749253833432953, "calibration/mean_confidence": 0.574653688117391, "calibration/prompt_uniqueness": 0.856641489325094, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012413194444444442, "completions/max_length": 3715.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 898.568408203125, "completions/mean_terminated_length": 909.8266723632812, "completions/min_length": 0.0, "completions/min_terminated_length": 287.6, "epoch": 0.491993850076874, "grad_norm": 0.000315765239065513, "learning_rate": 9.036144578313253e-08, "loss": -0.0096, "num_tokens": 538380359.0, "reward": 1.0108286142349243, "reward_std": 0.13068382143974305, "rewards/accuracy_reward": 0.735850703716278, "rewards/brier_reward": 0.7879548072814941, "rewards/confidence_uniqueness_reward": 0.9393365502357482, "rewards/format_reward": 0.987413203716278, "rewards/frontier_aurc_reward": -0.0011491154204122723, "rewards/frontier_coverage_0": -0.02586173443123698, "rewards/frontier_coverage_1": -0.02586173443123698, "rewards/frontier_coverage_10": -0.005061741080135107, "rewards/frontier_coverage_15": 0.04176960214972496, "rewards/frontier_coverage_20": 0.1021927997469902, "rewards/frontier_coverage_25": 0.1827650785446167, "rewards/frontier_coverage_5": -0.025736397597938776, "rewards/frontier_ece_reward": -0.007271666172891855, "rewards/frontier_entropy_batch_reward": -0.2584350109100342, "signal/accuracy_reward/centered_abs_mean": 0.158447265625, "signal/accuracy_reward/group_bin_occupancy": 0.203125, "signal/accuracy_reward/group_std_mean": 0.21330890357494353, "signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0792236328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0792236328125, "signal/advantage_abs_mean": 0.09550892561674118, "signal/advantage_pre_scale_abs_mean": 0.09550892561674118, "signal/advantage_pre_scale_std": 0.15468985438346863, "signal/advantage_std": 0.15468985438346863, "signal/brier_reward/centered_abs_mean": 0.13971660435199737, "signal/brier_reward/group_bin_occupancy": 0.8302083333333332, "signal/brier_reward/group_std_mean": 0.17900753021240234, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0139716612175107, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0139716612175107, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031369969993829724, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.836111111111111, "signal/confidence_uniqueness_reward/group_std_mean": 0.051959720253944394, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031369972042739392, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031369972042739392, "signal/format_reward/centered_abs_mean": 0.021185980923473836, "signal/format_reward/group_bin_occupancy": 0.14513888888888887, "signal/format_reward/group_std_mean": 0.03983290120959282, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010592990461736918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010592990461736918, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012949521420523523, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6989583333333333, "signal/frontier_aurc_reward/group_std_mean": 0.002273207646794617, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6186902576009744e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6186902576009744e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20164394080638887, "signal/frontier_coverage_0/group_bin_occupancy": 0.8111111111111112, "signal/frontier_coverage_0/group_std_mean": 0.26378363370895386, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025205494835972785, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025205494835972785, "signal/frontier_coverage_1/centered_abs_mean": 0.20164394080638887, "signal/frontier_coverage_1/group_bin_occupancy": 0.8111111111111112, "signal/frontier_coverage_1/group_std_mean": 0.26378363370895386, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025205494835972785, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025205494835972785, "signal/frontier_coverage_10/centered_abs_mean": 0.1517424076795578, "signal/frontier_coverage_10/group_bin_occupancy": 0.80625, "signal/frontier_coverage_10/group_std_mean": 0.19990101754665374, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018967799842357635, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018967799842357635, "signal/frontier_coverage_15/centered_abs_mean": 0.06446310877799988, "signal/frontier_coverage_15/group_bin_occupancy": 0.9152777777777776, "signal/frontier_coverage_15/group_std_mean": 0.08208967447280884, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008057888597249984, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008057888597249984, "signal/frontier_coverage_20/centered_abs_mean": 0.07898852378129959, "signal/frontier_coverage_20/group_bin_occupancy": 0.9135416666666666, "signal/frontier_coverage_20/group_std_mean": 0.10002039521932601, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009873565868474543, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009873565868474543, "signal/frontier_coverage_25/centered_abs_mean": 0.11750572323799133, "signal/frontier_coverage_25/group_bin_occupancy": 0.8972222222222221, "signal/frontier_coverage_25/group_std_mean": 0.14964151680469512, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014688215916976333, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014688215916976333, "signal/frontier_coverage_5/centered_abs_mean": 0.20136131048202516, "signal/frontier_coverage_5/group_bin_occupancy": 0.8111111111111112, "signal/frontier_coverage_5/group_std_mean": 0.2634296715259552, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002517016418278217, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002517016418278217, "signal/frontier_ece_reward/centered_abs_mean": 0.018270738050341608, "signal/frontier_ece_reward/group_bin_occupancy": 0.5680555555555555, "signal/frontier_ece_reward/group_std_mean": 0.022496170178055764, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018270738422870637, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018270738422870637, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32475059032440184, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7611111111111112, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39458569288253786, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03247505947947502, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03247505947947502, "step": 205 }, { "calibration/aurc": 0.10026530827954756, "calibration/batch_distribution_entropy": 0.9666801209614921, "calibration/batch_entropy_100bins": 0.9541731924906994, "calibration/batch_entropy_10bins": 0.9666801209614921, "calibration/batch_entropy_50bins": 0.9658220780724568, "calibration/batch_uniqueness": 0.9500334187474572, "calibration/buffer_distribution_entropy": 0.9828309697501236, "calibration/buffer_entropy_100bins": 0.9908304445646857, "calibration/buffer_entropy_10bins": 0.9828309697501236, "calibration/buffer_entropy_50bins": 0.9893492960251346, "calibration/confidence_entropy": 0.5110887140777053, "calibration/coverage@0%": 0.06926601280103145, "calibration/coverage@1%": 0.06926601280103145, "calibration/coverage@10%": 0.5960768061887002, "calibration/coverage@15%": 0.7677694893401483, "calibration/coverage@20%": 0.8755652253994567, "calibration/coverage@25%": 0.9500483492195055, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.3437007874015749, "calibration/ece": 0.15719824569018936, "calibration/mean_confidence": 0.5882376328465995, "calibration/prompt_uniqueness": 0.8630131514079985, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3914.6666666666665, "completions/max_terminated_length": 3914.6666666666665, "completions/mean_length": 903.0143229166666, "completions/mean_terminated_length": 913.7917277018229, "completions/min_length": 0.0, "completions/min_terminated_length": 272.6666666666667, "epoch": 0.49919376007799904, "num_tokens": 546488042.0, "reward": 0.9862767457962036, "reward_std": 0.13542574644088745, "rewards/accuracy_reward": 0.6841724514961243, "rewards/brier_reward": 0.7893265883127848, "rewards/confidence_uniqueness_reward": 0.9390823642412821, "rewards/format_reward": 0.9881365696589152, "rewards/frontier_aurc_reward": -0.0011579099421699841, "rewards/frontier_coverage_0": 0.0056370516152431565, "rewards/frontier_coverage_1": 0.0056370516152431565, "rewards/frontier_coverage_10": 0.01475714573947092, "rewards/frontier_coverage_15": 0.043342759211858116, "rewards/frontier_coverage_20": 0.09414516886075337, "rewards/frontier_coverage_25": 0.16349380711714426, "rewards/frontier_coverage_5": 0.00573221566931655, "rewards/frontier_ece_reward": -0.0046526785008609295, "rewards/frontier_entropy_batch_reward": -0.2639826734860738, "signal/accuracy_reward/centered_abs_mean": 0.156602643430233, "signal/accuracy_reward/group_bin_occupancy": 0.19849537037037038, "signal/accuracy_reward/group_std_mean": 0.20787501335144043, "signal/accuracy_reward/group_zero_std_frac": 0.41203704476356506, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783013217151165, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0783013217151165, "signal/advantage_abs_mean": 0.0979112833738327, "signal/advantage_pre_scale_abs_mean": 0.0979112833738327, "signal/advantage_pre_scale_std": 0.1580300529797872, "signal/advantage_std": 0.1580300529797872, "signal/brier_reward/centered_abs_mean": 0.14230993390083313, "signal/brier_reward/group_bin_occupancy": 0.8258101851851851, "signal/brier_reward/group_std_mean": 0.18486674626668295, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014230993886788687, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014230993886788687, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03189393070836862, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7905092592592592, "signal/confidence_uniqueness_reward/group_std_mean": 0.05960730090737343, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003189393396799763, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003189393396799763, "signal/format_reward/centered_abs_mean": 0.021647136037548382, "signal/format_reward/group_bin_occupancy": 0.1527777777777778, "signal/format_reward/group_std_mean": 0.04763485739628474, "signal/format_reward/group_zero_std_frac": 0.7777777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010823568018774191, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010823568018774191, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013959337957203388, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7077546296296297, "signal/frontier_aurc_reward/group_std_mean": 0.0026435600593686104, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7449173355998937e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7449173355998937e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20097551246484122, "signal/frontier_coverage_0/group_bin_occupancy": 0.8368055555555557, "signal/frontier_coverage_0/group_std_mean": 0.26183244585990906, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002512193905810515, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002512193905810515, "signal/frontier_coverage_1/centered_abs_mean": 0.20097551246484122, "signal/frontier_coverage_1/group_bin_occupancy": 0.8368055555555557, "signal/frontier_coverage_1/group_std_mean": 0.26183244585990906, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002512193905810515, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002512193905810515, "signal/frontier_coverage_10/centered_abs_mean": 0.14055238167444864, "signal/frontier_coverage_10/group_bin_occupancy": 0.8327546296296297, "signal/frontier_coverage_10/group_std_mean": 0.18522140880425772, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017569047631695867, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017569047631695867, "signal/frontier_coverage_15/centered_abs_mean": 0.06127483397722244, "signal/frontier_coverage_15/group_bin_occupancy": 0.9293981481481483, "signal/frontier_coverage_15/group_std_mean": 0.07892606407403946, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007659354790424308, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007659354790424308, "signal/frontier_coverage_20/centered_abs_mean": 0.0779200370113055, "signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555557, "signal/frontier_coverage_20/group_std_mean": 0.10050106793642044, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009740005092074474, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009740005092074474, "signal/frontier_coverage_25/centered_abs_mean": 0.11800318956375122, "signal/frontier_coverage_25/group_bin_occupancy": 0.8865740740740741, "signal/frontier_coverage_25/group_std_mean": 0.15238422652085623, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001475039830741783, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001475039830741783, "signal/frontier_coverage_5/centered_abs_mean": 0.20059374471505484, "signal/frontier_coverage_5/group_bin_occupancy": 0.8368055555555557, "signal/frontier_coverage_5/group_std_mean": 0.26134761174519855, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025074218089381852, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025074218089381852, "signal/frontier_ece_reward/centered_abs_mean": 0.01781535955766837, "signal/frontier_ece_reward/group_bin_occupancy": 0.5752314814814815, "signal/frontier_ece_reward/group_std_mean": 0.02214539299408595, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017815359557668369, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017815359557668369, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3191050589084625, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7662037037037037, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38939042886098224, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03191050638755163, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03191050638755163, "step": 208, "total_flos": 0.0, "train_loss": -0.012413898850074755, "train_runtime": 46320.0455, "train_samples_per_second": 0.324, "train_steps_per_second": 0.004 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 546488042, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }