{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6355440224731985, "calibration/batch_distribution_entropy": 0.6404299093794114, "calibration/batch_entropy_100bins": 0.477031643413819, "calibration/batch_entropy_10bins": 0.6404299093794114, "calibration/batch_entropy_50bins": 0.5591541369767675, "calibration/batch_uniqueness": 0.7208924159407343, "calibration/confidence_entropy": 0.34918231888339113, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49676340149214465, "calibration/mean_confidence": 0.7938105030524442, "calibration/prompt_uniqueness": 0.5982426959778107, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0369140625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1485.2, "completions/mean_length": 270.7107421875, "completions/mean_terminated_length": 222.20957946777344, "completions/min_length": 1.8, "completions/min_terminated_length": 1.8, "epoch": 0.016, "grad_norm": 0.04414910078048706, "learning_rate": 3.1249999999999997e-07, "loss": 0.074, "num_tokens": 17616110.0, "reward": 0.5323251605033874, "reward_std": 0.41435371041297914, "rewards/accuracy_reward": 0.2212890625, "rewards/brier_reward": 0.37365264296531675, "rewards/confidence_uniqueness_reward": 0.4837990701198578, "rewards/format_reward": 0.67568359375, "rewards/frontier_aurc_reward": 0.30136591792106626, "rewards/frontier_coverage_0": 0.30136591792106626, "rewards/frontier_coverage_1": 0.30136591792106626, "rewards/frontier_coverage_10": 0.30136591792106626, "rewards/frontier_coverage_15": 0.30136591792106626, "rewards/frontier_coverage_20": 0.30136591792106626, "rewards/frontier_coverage_25": 0.30136591792106626, "rewards/frontier_coverage_5": 0.30136591792106626, "rewards/frontier_ece_reward": 0.30136591792106626, "rewards/frontier_entropy_batch_reward": -0.6217953085899353, "signal/accuracy_reward/centered_abs_mean": 0.2416259765625, "signal/accuracy_reward/group_bin_occupancy": 0.2109375, "signal/accuracy_reward/group_std_mean": 0.2842506766319275, "signal/accuracy_reward/group_zero_std_frac": 0.3125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12081298828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12081298828125, "signal/advantage_abs_mean": 0.3522315204143524, "signal/advantage_pre_scale_abs_mean": 0.3522315204143524, "signal/advantage_pre_scale_std": 0.4241958498954773, "signal/advantage_std": 0.4241958498954773, "signal/brier_reward/centered_abs_mean": 0.3212295413017273, "signal/brier_reward/group_bin_occupancy": 0.748828125, "signal/brier_reward/group_std_mean": 0.36608951091766356, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03212295435369015, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03212295435369015, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.3023835599422455, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.594140625, "signal/confidence_uniqueness_reward/group_std_mean": 0.3513213813304901, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030238356068730356, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.030238356068730356, "signal/format_reward/centered_abs_mean": 0.408074951171875, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.45624412298202516, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2040374755859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2040374755859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.293000316619873, "signal/frontier_aurc_reward/group_bin_occupancy": 0.65546875, "signal/frontier_aurc_reward/group_std_mean": 0.3444704055786133, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_0/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_0/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_0/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_0/group_zero_std_frac": 0.003125, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_1/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_1/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_1/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_10/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_10/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_10/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_15/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_15/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_15/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_20/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_20/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_20/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_25/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_25/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_25/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_5/centered_abs_mean": 0.293000316619873, "signal/frontier_coverage_5/group_bin_occupancy": 0.65546875, "signal/frontier_coverage_5/group_std_mean": 0.3444704055786133, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003662504069507122, "signal/frontier_ece_reward/centered_abs_mean": 0.293000316619873, "signal/frontier_ece_reward/group_bin_occupancy": 0.65546875, "signal/frontier_ece_reward/group_std_mean": 0.3444704055786133, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029300032556056975, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029300032556056975, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43757479190826415, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.380859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4825276255607605, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.043757478892803195, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043757478892803195, "step": 5 }, { "calibration/aurc": 0.6721645281105297, "calibration/batch_distribution_entropy": 0.6581675725952678, "calibration/batch_entropy_100bins": 0.4893215163923771, "calibration/batch_entropy_10bins": 0.6581675725952678, "calibration/batch_entropy_50bins": 0.5712117371457062, "calibration/batch_uniqueness": 0.7298019114502236, "calibration/confidence_entropy": 0.34160307185532907, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5249416015362247, "calibration/mean_confidence": 0.7863750362684165, "calibration/prompt_uniqueness": 0.6165276543074268, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0361328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1505.8, "completions/mean_length": 264.63974609375, "completions/mean_terminated_length": 216.997314453125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.02532093971967697, "learning_rate": 6.249999999999999e-07, "loss": 0.0755, "num_tokens": 35426373.0, "reward": 0.5459524154663086, "reward_std": 0.39404299259185793, "rewards/accuracy_reward": 0.20927734375, "rewards/brier_reward": 0.3790619194507599, "rewards/confidence_uniqueness_reward": 0.5123092293739319, "rewards/format_reward": 0.71435546875, "rewards/frontier_aurc_reward": 0.29989256858825686, "rewards/frontier_coverage_0": 0.29989256858825686, "rewards/frontier_coverage_1": 0.29989256858825686, "rewards/frontier_coverage_10": 0.29989256858825686, "rewards/frontier_coverage_15": 0.29989256858825686, "rewards/frontier_coverage_20": 0.29989256858825686, "rewards/frontier_coverage_25": 0.29989256858825686, "rewards/frontier_coverage_5": 0.29989256858825686, "rewards/frontier_ece_reward": 0.29989256858825686, "rewards/frontier_entropy_batch_reward": -0.649796187877655, "signal/accuracy_reward/centered_abs_mean": 0.225921630859375, "signal/accuracy_reward/group_bin_occupancy": 0.21015625, "signal/accuracy_reward/group_std_mean": 0.27287338972091674, "signal/accuracy_reward/group_zero_std_frac": 0.31875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1129608154296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1129608154296875, "signal/advantage_abs_mean": 0.3253436267375946, "signal/advantage_pre_scale_abs_mean": 0.3253436267375946, "signal/advantage_pre_scale_std": 0.403624951839447, "signal/advantage_std": 0.403624951839447, "signal/brier_reward/centered_abs_mean": 0.3087932109832764, "signal/brier_reward/group_bin_occupancy": 0.76484375, "signal/brier_reward/group_std_mean": 0.35776872634887696, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030879321694374084, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030879321694374084, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.28355550169944765, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.593359375, "signal/confidence_uniqueness_reward/group_std_mean": 0.3405479848384857, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028355551511049272, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028355551511049272, "signal/format_reward/centered_abs_mean": 0.376983642578125, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.43766148686408995, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1884918212890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1884918212890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.28301833271980287, "signal/frontier_aurc_reward/group_bin_occupancy": 0.67578125, "signal/frontier_aurc_reward/group_std_mean": 0.33786413073539734, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_0/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_0/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_0/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_1/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_1/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_1/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_10/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_10/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_10/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_15/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_15/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_15/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_20/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_20/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_20/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_25/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_25/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_25/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_5/centered_abs_mean": 0.28301833271980287, "signal/frontier_coverage_5/group_bin_occupancy": 0.67578125, "signal/frontier_coverage_5/group_std_mean": 0.33786413073539734, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003537729289382696, "signal/frontier_ece_reward/centered_abs_mean": 0.28301833271980287, "signal/frontier_ece_reward/group_bin_occupancy": 0.67578125, "signal/frontier_ece_reward/group_std_mean": 0.33786413073539734, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02830183431506157, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02830183431506157, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42037245631217957, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.391796875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4748634576797485, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042037245631217954, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042037245631217954, "step": 10 }, { "calibration/aurc": 0.5960428535605962, "calibration/batch_distribution_entropy": 0.6396332627525727, "calibration/batch_entropy_100bins": 0.48268498625474204, "calibration/batch_entropy_10bins": 0.6396332627525727, "calibration/batch_entropy_50bins": 0.5648247374535533, "calibration/batch_uniqueness": 0.7106374576366465, "calibration/buffer_distribution_entropy": 0.6633551500439849, "calibration/buffer_entropy_100bins": 0.4946308797642045, "calibration/buffer_entropy_10bins": 0.6633551500439849, "calibration/buffer_entropy_50bins": 0.5778719937765917, "calibration/confidence_entropy": 0.34154283209851816, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48102077114836533, "calibration/mean_confidence": 0.8027908687275225, "calibration/prompt_uniqueness": 0.6133351997970939, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189453125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1433.8, "completions/mean_length": 204.99287109375, "completions/mean_terminated_length": 179.41653137207032, "completions/min_length": 2.6, "completions/min_terminated_length": 2.6, "epoch": 0.048, "grad_norm": 0.016035104170441628, "learning_rate": 9.374999999999999e-07, "loss": 0.0584, "num_tokens": 52574236.0, "reward": 0.6605345129966735, "reward_std": 0.3093711197376251, "rewards/accuracy_reward": 0.266015625, "rewards/brier_reward": 0.4797984719276428, "rewards/confidence_uniqueness_reward": 0.6384814620018006, "rewards/format_reward": 0.87236328125, "rewards/frontier_aurc_reward": 0.29653857182711363, "rewards/frontier_coverage_0": 0.3118060424923897, "rewards/frontier_coverage_1": 0.3118060424923897, "rewards/frontier_coverage_10": 0.3118060424923897, "rewards/frontier_coverage_15": 0.3118060424923897, "rewards/frontier_coverage_20": 0.3118060424923897, "rewards/frontier_coverage_25": 0.3118060424923897, "rewards/frontier_coverage_5": 0.3118060424923897, "rewards/frontier_ece_reward": 0.2838048979640007, "rewards/frontier_entropy_batch_reward": -0.798531997203827, "signal/accuracy_reward/centered_abs_mean": 0.197265625, "signal/accuracy_reward/group_bin_occupancy": 0.203125, "signal/accuracy_reward/group_std_mean": 0.24309078156948088, "signal/accuracy_reward/group_zero_std_frac": 0.375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0986328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0986328125, "signal/advantage_abs_mean": 0.2367635428905487, "signal/advantage_pre_scale_abs_mean": 0.2367635428905487, "signal/advantage_pre_scale_std": 0.32340609431266787, "signal/advantage_std": 0.32340609431266787, "signal/brier_reward/centered_abs_mean": 0.27546623945236204, "signal/brier_reward/group_bin_occupancy": 0.8015625, "signal/brier_reward/group_std_mean": 0.328661048412323, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027546624094247817, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.027546624094247817, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20335004329681397, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.601953125, "signal/confidence_uniqueness_reward/group_std_mean": 0.2676876664161682, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020335004664957523, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020335004664957523, "signal/format_reward/centered_abs_mean": 0.205865478515625, "signal/format_reward/group_bin_occupancy": 0.240625, "signal/format_reward/group_std_mean": 0.30276075601577757, "signal/format_reward/group_zero_std_frac": 0.075, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1029327392578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1029327392578125, "signal/frontier_aurc_reward/centered_abs_mean": 0.2184015023522079, "signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375, "signal/frontier_aurc_reward/group_std_mean": 0.2625438742339611, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.002730018919100985, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.002730018919100985, "signal/frontier_coverage_0/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_0/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_0/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_1/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_1/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_1/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_10/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_10/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_10/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_15/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_15/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_15/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_20/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_20/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_20/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_25/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_25/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_25/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_5/centered_abs_mean": 0.2380078285932541, "signal/frontier_coverage_5/group_bin_occupancy": 0.6953125, "signal/frontier_coverage_5/group_std_mean": 0.2922509163618088, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029750979971140622, "signal/frontier_ece_reward/centered_abs_mean": 0.24219779670238495, "signal/frontier_ece_reward/group_bin_occupancy": 0.692578125, "signal/frontier_ece_reward/group_std_mean": 0.29229960441589353, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024219780787825586, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024219780787825586, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2959599316120148, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.39140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39782981276512147, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02959599420428276, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02959599420428276, "step": 15 }, { "calibration/aurc": 0.5208466666721476, "calibration/batch_distribution_entropy": 0.6984473949280432, "calibration/batch_entropy_100bins": 0.5226043753565173, "calibration/batch_entropy_10bins": 0.6984473949280432, "calibration/batch_entropy_50bins": 0.6099272547110044, "calibration/batch_uniqueness": 0.7616123918390276, "calibration/buffer_distribution_entropy": 0.6589676652976166, "calibration/buffer_entropy_100bins": 0.49633332911980316, "calibration/buffer_entropy_10bins": 0.6589676652976166, "calibration/buffer_entropy_50bins": 0.5794713007771074, "calibration/confidence_entropy": 0.36403103435288126, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3806335949122962, "calibration/mean_confidence": 0.7821886770336945, "calibration/prompt_uniqueness": 0.667976662783546, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003515625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1149.6, "completions/mean_length": 140.0291015625, "completions/mean_terminated_length": 135.11297149658202, "completions/min_length": 25.2, "completions/min_terminated_length": 25.2, "epoch": 0.064, "grad_norm": 0.00845023151487112, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 68926534.0, "reward": 0.706857168674469, "reward_std": 0.20300790965557097, "rewards/accuracy_reward": 0.34228515625, "rewards/brier_reward": 0.5721023082733154, "rewards/confidence_uniqueness_reward": 0.7548041224479676, "rewards/format_reward": 0.9798828125, "rewards/frontier_aurc_reward": -0.0069354880601167675, "rewards/frontier_coverage_0": 0.06275556683540344, "rewards/frontier_coverage_1": 0.06275556683540344, "rewards/frontier_coverage_10": 0.06275556683540344, "rewards/frontier_coverage_15": 0.06275556683540344, "rewards/frontier_coverage_20": 0.06275556683540344, "rewards/frontier_coverage_25": 0.06275556683540344, "rewards/frontier_coverage_5": 0.06275556683540344, "rewards/frontier_ece_reward": -0.05814636992290616, "rewards/frontier_entropy_batch_reward": -0.8650725841522217, "signal/accuracy_reward/centered_abs_mean": 0.201666259765625, "signal/accuracy_reward/group_bin_occupancy": 0.206640625, "signal/accuracy_reward/group_std_mean": 0.25092312395572663, "signal/accuracy_reward/group_zero_std_frac": 0.346875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1008331298828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1008331298828125, "signal/advantage_abs_mean": 0.1547175496816635, "signal/advantage_pre_scale_abs_mean": 0.1547175496816635, "signal/advantage_pre_scale_std": 0.2182164669036865, "signal/advantage_std": 0.2182164669036865, "signal/brier_reward/centered_abs_mean": 0.2411728620529175, "signal/brier_reward/group_bin_occupancy": 0.828125, "signal/brier_reward/group_std_mean": 0.2961599349975586, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02411728650331497, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02411728650331497, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11858726739883423, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6859375, "signal/confidence_uniqueness_reward/group_std_mean": 0.15527499318122864, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011858727037906646, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011858727037906646, "signal/format_reward/centered_abs_mean": 0.03802490234375, "signal/format_reward/group_bin_occupancy": 0.18125, "signal/format_reward/group_std_mean": 0.09205524399876594, "signal/format_reward/group_zero_std_frac": 0.55, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019012451171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.019012451171875, "signal/frontier_aurc_reward/centered_abs_mean": 0.004820964112877846, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74609375, "signal/frontier_aurc_reward/group_std_mean": 0.00661336500197649, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.0262053739279506e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.0262053739279506e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_0/group_bin_occupancy": 0.6875, "signal/frontier_coverage_0/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_1/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_1/group_bin_occupancy": 0.6875, "signal/frontier_coverage_1/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_10/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_10/group_bin_occupancy": 0.6875, "signal/frontier_coverage_10/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_15/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_15/group_bin_occupancy": 0.6875, "signal/frontier_coverage_15/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_20/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_20/group_bin_occupancy": 0.6875, "signal/frontier_coverage_20/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_25/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_25/group_bin_occupancy": 0.6875, "signal/frontier_coverage_25/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_5/centered_abs_mean": 0.10514531433582305, "signal/frontier_coverage_5/group_bin_occupancy": 0.6875, "signal/frontier_coverage_5/group_std_mean": 0.16418030858039856, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013143164571374655, "signal/frontier_ece_reward/centered_abs_mean": 0.1306596964597702, "signal/frontier_ece_reward/group_bin_occupancy": 0.674609375, "signal/frontier_ece_reward/group_std_mean": 0.16075450479984282, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013065969571471214, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013065969571471214, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21918058693408965, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.380859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3507233917713165, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.071875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021918059140443803, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021918059140443803, "step": 20 }, { "calibration/aurc": 0.6159178774567231, "calibration/batch_distribution_entropy": 0.8289744360363516, "calibration/batch_entropy_100bins": 0.6255120655845456, "calibration/batch_entropy_10bins": 0.8289744360363516, "calibration/batch_entropy_50bins": 0.7153792756260304, "calibration/batch_uniqueness": 0.8485990399892221, "calibration/buffer_distribution_entropy": 0.6896777359021345, "calibration/buffer_entropy_100bins": 0.520473966835383, "calibration/buffer_entropy_10bins": 0.6896777359021345, "calibration/buffer_entropy_50bins": 0.6052089462001826, "calibration/confidence_entropy": 0.43118102390537255, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3893414577098084, "calibration/mean_confidence": 0.7016207582801898, "calibration/prompt_uniqueness": 0.7763101453654324, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 1536.0, "completions/max_terminated_length": 825.0, "completions/mean_length": 115.275390625, "completions/mean_terminated_length": 114.02607116699218, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.08, "grad_norm": 0.004183254204690456, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 85040106.0, "reward": 0.7534348726272583, "reward_std": 0.18122220635414124, "rewards/accuracy_reward": 0.36455078125, "rewards/brier_reward": 0.6307034969329834, "rewards/confidence_uniqueness_reward": 0.847357702255249, "rewards/format_reward": 0.99326171875, "rewards/frontier_aurc_reward": -0.006123499572277069, "rewards/frontier_coverage_0": 0.07438097894191742, "rewards/frontier_coverage_1": 0.07438097894191742, "rewards/frontier_coverage_10": 0.07438097894191742, "rewards/frontier_coverage_15": 0.07438097894191742, "rewards/frontier_coverage_20": 0.07438097894191742, "rewards/frontier_coverage_25": 0.07438097894191742, "rewards/frontier_coverage_5": 0.07438097894191742, "rewards/frontier_ece_reward": -0.03736944012343883, "rewards/frontier_entropy_batch_reward": -0.7597236037254333, "signal/accuracy_reward/centered_abs_mean": 0.192083740234375, "signal/accuracy_reward/group_bin_occupancy": 0.206640625, "signal/accuracy_reward/group_std_mean": 0.2422287493944168, "signal/accuracy_reward/group_zero_std_frac": 0.346875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0960418701171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0960418701171875, "signal/advantage_abs_mean": 0.14200334548950194, "signal/advantage_pre_scale_abs_mean": 0.14200334548950194, "signal/advantage_pre_scale_std": 0.19599647223949432, "signal/advantage_std": 0.19599647223949432, "signal/brier_reward/centered_abs_mean": 0.23228658139705657, "signal/brier_reward/group_bin_occupancy": 0.88359375, "signal/brier_reward/group_std_mean": 0.28496087789535524, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023228658363223076, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023228658363223076, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0650908425450325, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.745703125, "signal/confidence_uniqueness_reward/group_std_mean": 0.09223922342061996, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006509084347635507, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006509084347635507, "signal/format_reward/centered_abs_mean": 0.012933349609375, "signal/format_reward/group_bin_occupancy": 0.148046875, "signal/format_reward/group_std_mean": 0.03475438989698887, "signal/format_reward/group_zero_std_frac": 0.815625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0064666748046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0064666748046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00406914739869535, "signal/frontier_aurc_reward/group_bin_occupancy": 0.739453125, "signal/frontier_aurc_reward/group_std_mean": 0.006008286867290736, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0864344666479154e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0864344666479154e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_0/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_0/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_1/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_1/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_1/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_10/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_10/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_10/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_15/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_15/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_15/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_20/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_20/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_20/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_25/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_25/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_25/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_5/centered_abs_mean": 0.15233553797006608, "signal/frontier_coverage_5/group_bin_occupancy": 0.795703125, "signal/frontier_coverage_5/group_std_mean": 0.22025286853313447, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019041943131014704, "signal/frontier_ece_reward/centered_abs_mean": 0.11693819165229798, "signal/frontier_ece_reward/group_bin_occupancy": 0.7625, "signal/frontier_ece_reward/group_std_mean": 0.14299911260604858, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011693819798529148, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011693819798529148, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34947873950004577, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46870680451393126, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03494787439703941, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03494787439703941, "step": 25 }, { "calibration/aurc": 0.6253142563211475, "calibration/batch_distribution_entropy": 0.9522747813721082, "calibration/batch_entropy_100bins": 0.8099158365008308, "calibration/batch_entropy_10bins": 0.9522747813721082, "calibration/batch_entropy_50bins": 0.8759764029517731, "calibration/batch_uniqueness": 0.9186538181527467, "calibration/buffer_distribution_entropy": 0.7582834328496173, "calibration/buffer_entropy_100bins": 0.580513610120933, "calibration/buffer_entropy_10bins": 0.7582834328496173, "calibration/buffer_entropy_50bins": 0.6668539149358436, "calibration/confidence_entropy": 0.5189439527113646, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.006274509803921568, "calibration/coverage@30%": 0.006274509803921568, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2600914235574404, "calibration/mean_confidence": 0.5131853527628318, "calibration/prompt_uniqueness": 0.8575710010279367, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 1536.0, "completions/max_terminated_length": 549.0, "completions/mean_length": 111.59697265625, "completions/mean_terminated_length": 109.64572296142578, "completions/min_length": 35.8, "completions/min_terminated_length": 35.8, "epoch": 0.096, "grad_norm": 0.00540167186409235, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 101227467.0, "reward": 0.8009824633598328, "reward_std": 0.1627292662858963, "rewards/accuracy_reward": 0.3607421875, "rewards/brier_reward": 0.6999990940093994, "rewards/confidence_uniqueness_reward": 0.9195750474929809, "rewards/format_reward": 0.99453125, "rewards/frontier_aurc_reward": -0.005178525112569332, "rewards/frontier_coverage_0": 0.11951842457056046, "rewards/frontier_coverage_1": 0.11951842457056046, "rewards/frontier_coverage_10": 0.11951842457056046, "rewards/frontier_coverage_15": 0.11951842457056046, "rewards/frontier_coverage_20": 0.11951842457056046, "rewards/frontier_coverage_25": 0.11951842457056046, "rewards/frontier_coverage_5": 0.11951842457056046, "rewards/frontier_ece_reward": -0.016483052633702755, "rewards/frontier_entropy_batch_reward": -0.47356472015380857, "signal/accuracy_reward/centered_abs_mean": 0.1861328125, "signal/accuracy_reward/group_bin_occupancy": 0.20390625, "signal/accuracy_reward/group_std_mean": 0.235753658413887, "signal/accuracy_reward/group_zero_std_frac": 0.36875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09306640625, "signal/advantage_abs_mean": 0.12720103561878204, "signal/advantage_pre_scale_abs_mean": 0.12720103561878204, "signal/advantage_pre_scale_std": 0.1749451279640198, "signal/advantage_std": 0.1749451279640198, "signal/brier_reward/centered_abs_mean": 0.2198871850967407, "signal/brier_reward/group_bin_occupancy": 0.91796875, "signal/brier_reward/group_std_mean": 0.270095694065094, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02198871858417988, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02198871858417988, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047896023094654086, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.71328125, "signal/confidence_uniqueness_reward/group_std_mean": 0.07273447662591934, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004789602383971215, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004789602383971215, "signal/format_reward/centered_abs_mean": 0.010546875, "signal/format_reward/group_bin_occupancy": 0.1453125, "signal/format_reward/group_std_mean": 0.029590686410665513, "signal/format_reward/group_zero_std_frac": 0.8375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0052734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0052734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024232265073806047, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728125, "signal/frontier_aurc_reward/group_std_mean": 0.0038812434300780295, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0290332506410778e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0290332506410778e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_0/group_bin_occupancy": 0.915625, "signal/frontier_coverage_0/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_1/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_1/group_bin_occupancy": 0.915625, "signal/frontier_coverage_1/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_10/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_10/group_bin_occupancy": 0.915625, "signal/frontier_coverage_10/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_15/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_15/group_bin_occupancy": 0.915625, "signal/frontier_coverage_15/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_20/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_20/group_bin_occupancy": 0.915625, "signal/frontier_coverage_20/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_25/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_25/group_bin_occupancy": 0.915625, "signal/frontier_coverage_25/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_5/centered_abs_mean": 0.24059977233409882, "signal/frontier_coverage_5/group_bin_occupancy": 0.915625, "signal/frontier_coverage_5/group_std_mean": 0.30937875509262086, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030074971728026867, "signal/frontier_ece_reward/centered_abs_mean": 0.09630160331726074, "signal/frontier_ece_reward/group_bin_occupancy": 0.794140625, "signal/frontier_ece_reward/group_std_mean": 0.11887068897485734, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009630160499364137, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009630160499364137, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45851866006851194, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5251657009124756, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04585186541080475, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04585186541080475, "step": 30 }, { "calibration/aurc": 0.5182934023322565, "calibration/batch_distribution_entropy": 0.9200624959435266, "calibration/batch_entropy_100bins": 0.9203410859236761, "calibration/batch_entropy_10bins": 0.9200624959435266, "calibration/batch_entropy_50bins": 0.9307206167012977, "calibration/batch_uniqueness": 0.9428976783143688, "calibration/buffer_distribution_entropy": 0.8451822558633515, "calibration/buffer_entropy_100bins": 0.6813660232509321, "calibration/buffer_entropy_10bins": 0.8451822558633515, "calibration/buffer_entropy_50bins": 0.7589156786572054, "calibration/confidence_entropy": 0.4994208357830646, "calibration/coverage@0%": 0.001573256191699024, "calibration/coverage@1%": 0.001573256191699024, "calibration/coverage@10%": 0.001573256191699024, "calibration/coverage@15%": 0.001573256191699024, "calibration/coverage@20%": 0.001573256191699024, "calibration/coverage@25%": 0.0023606577665021737, "calibration/coverage@30%": 0.009433349318565041, "calibration/coverage@5%": 0.001573256191699024, "calibration/ece": 0.19128083609792296, "calibration/mean_confidence": 0.3484087206325151, "calibration/prompt_uniqueness": 0.8813607651534859, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00205078125, "completions/max_length": 1536.0, "completions/max_terminated_length": 666.6, "completions/mean_length": 113.24580078125, "completions/mean_terminated_length": 110.32241821289062, "completions/min_length": 37.2, "completions/min_terminated_length": 37.2, "epoch": 0.112, "grad_norm": 0.004447246436029673, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 117496576.0, "reward": 0.8329338908195496, "reward_std": 0.1337667301297188, "rewards/accuracy_reward": 0.3892578125, "rewards/brier_reward": 0.7226597905158997, "rewards/confidence_uniqueness_reward": 0.9372900366783142, "rewards/format_reward": 0.99619140625, "rewards/frontier_aurc_reward": -0.0045765116810798645, "rewards/frontier_coverage_0": 0.14142859876155853, "rewards/frontier_coverage_1": 0.14142859876155853, "rewards/frontier_coverage_10": 0.14142859876155853, "rewards/frontier_coverage_15": 0.14142859876155853, "rewards/frontier_coverage_20": 0.14142859876155853, "rewards/frontier_coverage_25": 0.14142859876155853, "rewards/frontier_coverage_5": 0.14142859876155853, "rewards/frontier_ece_reward": -0.00010334124672226608, "rewards/frontier_entropy_batch_reward": -0.3809317171573639, "signal/accuracy_reward/centered_abs_mean": 0.18907470703125, "signal/accuracy_reward/group_bin_occupancy": 0.205859375, "signal/accuracy_reward/group_std_mean": 0.23918051719665528, "signal/accuracy_reward/group_zero_std_frac": 0.353125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.094537353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.094537353515625, "signal/advantage_abs_mean": 0.10340933352708817, "signal/advantage_pre_scale_abs_mean": 0.10340933352708817, "signal/advantage_pre_scale_std": 0.14824790954589845, "signal/advantage_std": 0.14824790954589845, "signal/brier_reward/centered_abs_mean": 0.1997154474258423, "signal/brier_reward/group_bin_occupancy": 0.876171875, "signal/brier_reward/group_std_mean": 0.251619490981102, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019971545413136484, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019971545413136484, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030846378952264785, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85234375, "signal/confidence_uniqueness_reward/group_std_mean": 0.049375799298286435, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030846379697322844, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030846379697322844, "signal/format_reward/centered_abs_mean": 0.007379150390625, "signal/format_reward/group_bin_occupancy": 0.140234375, "signal/format_reward/group_std_mean": 0.02154465951025486, "signal/format_reward/group_zero_std_frac": 0.878125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036895751953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0036895751953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013135876040905714, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71875, "signal/frontier_aurc_reward/group_std_mean": 0.002168184705078602, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.641984490561299e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.641984490561299e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_0/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_0/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_1/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_1/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_1/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_10/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_10/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_10/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_15/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_15/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_15/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_20/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_20/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_20/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_25/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_25/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_25/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_5/centered_abs_mean": 0.3162419438362122, "signal/frontier_coverage_5/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_5/group_std_mean": 0.3914815127849579, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003953024419024587, "signal/frontier_ece_reward/centered_abs_mean": 0.052671823650598526, "signal/frontier_ece_reward/group_bin_occupancy": 0.6921875, "signal/frontier_ece_reward/group_std_mean": 0.07692344933748245, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005267182365059853, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005267182365059853, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4283927083015442, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.822265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.49359052777290346, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04283927157521248, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04283927157521248, "step": 35 }, { "calibration/aurc": 0.5740054027668963, "calibration/batch_distribution_entropy": 0.9066430352384233, "calibration/batch_entropy_100bins": 0.922959840976052, "calibration/batch_entropy_10bins": 0.9066430352384233, "calibration/batch_entropy_50bins": 0.9289300475616054, "calibration/batch_uniqueness": 0.9406546241127016, "calibration/buffer_distribution_entropy": 0.9044162694042044, "calibration/buffer_entropy_100bins": 0.7619221563316753, "calibration/buffer_entropy_10bins": 0.9044162694042044, "calibration/buffer_entropy_50bins": 0.8276472839940714, "calibration/confidence_entropy": 0.5025416372663043, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.18815817750495797, "calibration/mean_confidence": 0.33185162881752184, "calibration/prompt_uniqueness": 0.8810801774861978, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0017578125, "completions/max_length": 1536.0, "completions/max_terminated_length": 758.6, "completions/mean_length": 120.3833984375, "completions/mean_terminated_length": 117.88789825439453, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.128, "grad_norm": 0.00149272452108562, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 133645974.0, "reward": 0.8357627868652344, "reward_std": 0.12221252173185349, "rewards/accuracy_reward": 0.3927734375, "rewards/brier_reward": 0.7258547782897949, "rewards/confidence_uniqueness_reward": 0.9388667583465576, "rewards/format_reward": 0.99716796875, "rewards/frontier_aurc_reward": -0.004377355705946684, "rewards/frontier_coverage_0": 0.13741703778505326, "rewards/frontier_coverage_1": 0.13741703778505326, "rewards/frontier_coverage_10": 0.13741703778505326, "rewards/frontier_coverage_15": 0.13741703778505326, "rewards/frontier_coverage_20": 0.13741703778505326, "rewards/frontier_coverage_25": 0.13741703778505326, "rewards/frontier_coverage_5": 0.13741703778505326, "rewards/frontier_ece_reward": 0.003349437890574336, "rewards/frontier_entropy_batch_reward": -0.3798429071903229, "signal/accuracy_reward/centered_abs_mean": 0.17041015625, "signal/accuracy_reward/group_bin_occupancy": 0.2015625, "signal/accuracy_reward/group_std_mean": 0.2198496311903, "signal/accuracy_reward/group_zero_std_frac": 0.3875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.085205078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.085205078125, "signal/advantage_abs_mean": 0.09425371885299683, "signal/advantage_pre_scale_abs_mean": 0.09425371885299683, "signal/advantage_pre_scale_std": 0.13775794506072997, "signal/advantage_std": 0.13775794506072997, "signal/brier_reward/centered_abs_mean": 0.18953997492790223, "signal/brier_reward/group_bin_occupancy": 0.882421875, "signal/brier_reward/group_std_mean": 0.23842448592185975, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018953998014330863, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018953998014330863, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025395025685429572, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.911328125, "signal/confidence_uniqueness_reward/group_std_mean": 0.03915891274809837, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002539502549916506, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002539502549916506, "signal/format_reward/centered_abs_mean": 0.005462646484375, "signal/format_reward/group_bin_occupancy": 0.135546875, "signal/format_reward/group_std_mean": 0.015347770974040031, "signal/format_reward/group_zero_std_frac": 0.915625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027313232421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027313232421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.001331974472850561, "signal/frontier_aurc_reward/group_bin_occupancy": 0.783203125, "signal/frontier_aurc_reward/group_std_mean": 0.0020305470563471316, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6649681492708622e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6649681492708622e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_0/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_0/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_1/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_1/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_1/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_10/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_10/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_10/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_15/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_15/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_15/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_20/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_20/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_20/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_25/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_25/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_25/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_5/centered_abs_mean": 0.3050907075405121, "signal/frontier_coverage_5/group_bin_occupancy": 0.938671875, "signal/frontier_coverage_5/group_std_mean": 0.37571829557418823, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038136340212076903, "signal/frontier_ece_reward/centered_abs_mean": 0.048742403835058214, "signal/frontier_ece_reward/group_bin_occupancy": 0.72265625, "signal/frontier_ece_reward/group_std_mean": 0.07104799449443817, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004874240513890982, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004874240513890982, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40303301215171816, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.857421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4729976952075958, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.040303300321102145, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.040303300321102145, "step": 40 }, { "calibration/aurc": 0.38396500517480436, "calibration/batch_distribution_entropy": 0.967181094225583, "calibration/batch_entropy_100bins": 0.9534893860190756, "calibration/batch_entropy_10bins": 0.967181094225583, "calibration/batch_entropy_50bins": 0.9683228737901771, "calibration/batch_uniqueness": 0.9549240591535882, "calibration/buffer_distribution_entropy": 0.9348020467410049, "calibration/buffer_entropy_100bins": 0.8141507430697466, "calibration/buffer_entropy_10bins": 0.9348020467410049, "calibration/buffer_entropy_50bins": 0.8695878210283782, "calibration/confidence_entropy": 0.5490850423679848, "calibration/coverage@0%": 0.0027389615949119372, "calibration/coverage@1%": 0.0027389615949119372, "calibration/coverage@10%": 0.005087298189823874, "calibration/coverage@15%": 0.06289979818982387, "calibration/coverage@20%": 0.11407167318982388, "calibration/coverage@25%": 0.21096043297455966, "calibration/coverage@30%": 0.21800085616438353, "calibration/coverage@5%": 0.0027389615949119372, "calibration/ece": 0.20184070224738887, "calibration/mean_confidence": 0.45353375496514436, "calibration/prompt_uniqueness": 0.8953690003902185, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1536.0, "completions/max_terminated_length": 592.2, "completions/mean_length": 131.33984375, "completions/mean_terminated_length": 130.2405517578125, "completions/min_length": 46.2, "completions/min_terminated_length": 46.2, "epoch": 0.144, "grad_norm": 0.0017877706559374928, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 149941326.0, "reward": 0.9030540823936463, "reward_std": 0.13035276234149934, "rewards/accuracy_reward": 0.50322265625, "rewards/brier_reward": 0.7159843802452087, "rewards/confidence_uniqueness_reward": 0.9531710863113403, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.0038798670284450054, "rewards/frontier_coverage_0": 0.033135686349123714, "rewards/frontier_coverage_1": 0.033135686349123714, "rewards/frontier_coverage_10": 0.033135686349123714, "rewards/frontier_coverage_15": 0.033135686349123714, "rewards/frontier_coverage_20": 0.033135686349123714, "rewards/frontier_coverage_25": 0.033135686349123714, "rewards/frontier_coverage_5": 0.033135686349123714, "rewards/frontier_ece_reward": 0.010891084442846477, "rewards/frontier_entropy_batch_reward": -0.18729186952114105, "signal/accuracy_reward/centered_abs_mean": 0.165521240234375, "signal/accuracy_reward/group_bin_occupancy": 0.2, "signal/accuracy_reward/group_std_mean": 0.21491027772426605, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0827606201171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0827606201171875, "signal/advantage_abs_mean": 0.10254481285810471, "signal/advantage_pre_scale_abs_mean": 0.10254481285810471, "signal/advantage_pre_scale_std": 0.1436397671699524, "signal/advantage_std": 0.1436397671699524, "signal/brier_reward/centered_abs_mean": 0.19424692094326018, "signal/brier_reward/group_bin_occupancy": 0.930078125, "signal/brier_reward/group_std_mean": 0.24112644791603088, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019424692168831824, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019424692168831824, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01740786787122488, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93984375, "signal/confidence_uniqueness_reward/group_std_mean": 0.02573142237961292, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017407866893336178, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017407866893336178, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_bin_occupancy": 0.13046875, "signal/format_reward/group_std_mean": 0.007733980286866426, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018934236606583, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7984375, "signal/frontier_aurc_reward/group_std_mean": 0.0027514519169926643, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.366779626754578e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.366779626754578e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_0/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_0/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_1/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_1/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_1/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_10/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_10/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_10/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_15/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_15/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_15/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_20/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_20/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_20/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_25/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_25/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_25/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_5/centered_abs_mean": 0.2569372057914734, "signal/frontier_coverage_5/group_bin_occupancy": 0.95078125, "signal/frontier_coverage_5/group_std_mean": 0.3219131588935852, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00321171497926116, "signal/frontier_ece_reward/centered_abs_mean": 0.0656904973089695, "signal/frontier_ece_reward/group_bin_occupancy": 0.807421875, "signal/frontier_ece_reward/group_std_mean": 0.08761606812477112, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0065690501593053344, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0065690501593053344, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27559973001480104, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.846484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3480221152305603, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027559973299503326, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027559973299503326, "step": 45 }, { "calibration/aurc": 0.44252966701627444, "calibration/batch_distribution_entropy": 0.9766154522720358, "calibration/batch_entropy_100bins": 0.9551690729484854, "calibration/batch_entropy_10bins": 0.9766154522720358, "calibration/batch_entropy_50bins": 0.9720948064609075, "calibration/batch_uniqueness": 0.9577404992006823, "calibration/buffer_distribution_entropy": 0.9504300762276795, "calibration/buffer_entropy_100bins": 0.8514806183722019, "calibration/buffer_entropy_10bins": 0.9504300762276795, "calibration/buffer_entropy_50bins": 0.8980914859901915, "calibration/confidence_entropy": 0.537290060462475, "calibration/coverage@0%": 0.002737435727715744, "calibration/coverage@1%": 0.002737435727715744, "calibration/coverage@10%": 0.00625994062008365, "calibration/coverage@15%": 0.00625994062008365, "calibration/coverage@20%": 0.010564459944936879, "calibration/coverage@25%": 0.021541782380760523, "calibration/coverage@30%": 0.04388704925463336, "calibration/coverage@5%": 0.002737435727715744, "calibration/ece": 0.14565709539204225, "calibration/mean_confidence": 0.5353336754902873, "calibration/prompt_uniqueness": 0.8973251380669008, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1338.4, "completions/max_terminated_length": 639.8, "completions/mean_length": 139.0447265625, "completions/mean_terminated_length": 138.36163940429688, "completions/min_length": 53.8, "completions/min_terminated_length": 53.8, "epoch": 0.16, "grad_norm": 0.0017825138056650758, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 166386072.0, "reward": 0.8922484874725342, "reward_std": 0.13068339228630066, "rewards/accuracy_reward": 0.46005859375, "rewards/brier_reward": 0.7271055817604065, "rewards/confidence_uniqueness_reward": 0.9578215956687928, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.004173227492719889, "rewards/frontier_coverage_0": 0.07299970909953117, "rewards/frontier_coverage_1": 0.07299970909953117, "rewards/frontier_coverage_10": 0.07299970909953117, "rewards/frontier_coverage_15": 0.07299970909953117, "rewards/frontier_coverage_20": 0.07299970909953117, "rewards/frontier_coverage_25": 0.07299970909953117, "rewards/frontier_coverage_5": 0.07299970909953117, "rewards/frontier_ece_reward": 0.011208084784448147, "rewards/frontier_entropy_batch_reward": -0.13387853503227234, "signal/accuracy_reward/centered_abs_mean": 0.156707763671875, "signal/accuracy_reward/group_bin_occupancy": 0.19375, "signal/accuracy_reward/group_std_mean": 0.20061389803886415, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783538818359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0783538818359375, "signal/advantage_abs_mean": 0.10416142642498016, "signal/advantage_pre_scale_abs_mean": 0.10416142642498016, "signal/advantage_pre_scale_std": 0.14782364070415496, "signal/advantage_std": 0.14782364070415496, "signal/brier_reward/centered_abs_mean": 0.19167569279670715, "signal/brier_reward/group_bin_occupancy": 0.9265625, "signal/brier_reward/group_std_mean": 0.23840481042861938, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019167570024728776, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019167570024728776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013879508711397648, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.948828125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018870834633708, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013879508711397647, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013879508711397647, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0035306816454976795, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025133413262665273, "signal/frontier_aurc_reward/group_bin_occupancy": 0.814453125, "signal/frontier_aurc_reward/group_std_mean": 0.003546137036755681, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.141676788800396e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.141676788800396e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_0/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_0/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_1/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_1/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_1/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_10/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_10/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_10/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_15/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_15/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_15/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_20/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_20/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_20/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_25/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_25/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_25/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_5/centered_abs_mean": 0.21136297285556793, "signal/frontier_coverage_5/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_5/group_std_mean": 0.2744253635406494, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002642037160694599, "signal/frontier_ece_reward/centered_abs_mean": 0.07558847218751907, "signal/frontier_ece_reward/group_bin_occupancy": 0.81328125, "signal/frontier_ece_reward/group_std_mean": 0.09702952355146408, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0075588468462228775, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0075588468462228775, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21678448021411895, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.81953125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2975514531135559, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021678448468446732, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021678448468446732, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.5640482398747311, "eval_calibration/batch_distribution_entropy": 0.9338984167163116, "eval_calibration/batch_entropy_100bins": 0.6999477876337099, "eval_calibration/batch_entropy_10bins": 0.9338984167163116, "eval_calibration/batch_entropy_50bins": 0.796822593270633, "eval_calibration/batch_uniqueness": 0.9091796875, "eval_calibration/buffer_distribution_entropy": 0.9562562919532525, "eval_calibration/buffer_entropy_100bins": 0.8692259796843912, "eval_calibration/buffer_entropy_10bins": 0.9562562919532525, "eval_calibration/buffer_entropy_50bins": 0.9110833803907854, "eval_calibration/confidence_entropy": 0.5144369135960669, "eval_calibration/coverage@0%": 0.0078125, "eval_calibration/coverage@1%": 0.0078125, "eval_calibration/coverage@10%": 0.0078125, "eval_calibration/coverage@15%": 0.0078125, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.09375, "eval_calibration/coverage@30%": 0.09375, "eval_calibration/coverage@5%": 0.0078125, "eval_calibration/ece": 0.28908419881089126, "eval_calibration/mean_confidence": 0.5618975390120369, "eval_calibration/prompt_uniqueness": 0.9091796875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 330.5, "eval_completions/max_terminated_length": 330.5, "eval_completions/mean_length": 145.39965057373047, "eval_completions/mean_terminated_length": 145.39965057373047, "eval_completions/min_length": 69.75, "eval_completions/min_terminated_length": 69.75, "eval_loss": 0.0, "eval_num_tokens": 166386072.0, "eval_reward": 0.78005750477314, "eval_reward_std": 0.24558523669838905, "eval_rewards/accuracy_reward": 0.341796875, "eval_rewards/brier_reward": 0.7065591365098953, "eval_rewards/confidence_uniqueness_reward": 0.90478515625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.005420909612439573, "eval_rewards/frontier_coverage_0": 0.13509072735905647, "eval_rewards/frontier_coverage_1": 0.13509072735905647, "eval_rewards/frontier_coverage_10": 0.13509072735905647, "eval_rewards/frontier_coverage_15": 0.13509072735905647, "eval_rewards/frontier_coverage_20": 0.13509072735905647, "eval_rewards/frontier_coverage_25": 0.13509072735905647, "eval_rewards/frontier_coverage_5": 0.13509072735905647, "eval_rewards/frontier_ece_reward": -0.00845025188755244, "eval_rewards/frontier_entropy_batch_reward": -0.6288299560546875, "eval_runtime": 18.697, "eval_samples_per_second": 26.742, "eval_signal/accuracy_reward/centered_abs_mean": 0.4388427734375, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4750789478421211, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21942138671875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21942138671875, "eval_signal/advantage_abs_mean": 0.2188284732401371, "eval_signal/advantage_pre_scale_abs_mean": 0.2188284732401371, "eval_signal/advantage_pre_scale_std": 0.24322915077209473, "eval_signal/advantage_std": 0.24322915077209473, "eval_signal/brier_reward/centered_abs_mean": 0.2244720533490181, "eval_signal/brier_reward/group_bin_occupancy": 0.9375, "eval_signal/brier_reward/group_std_mean": 0.27552034705877304, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022447205148637295, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022447205148637295, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0378265380859375, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.359375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04345181304961443, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003782653948292136, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003782653948292136, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004264666116796434, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8828125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005977678927592933, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3308327551349066e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3308327551349066e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_0/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_1/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_10/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_15/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_20/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_25/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.24422482028603554, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_5/group_std_mean": 0.3201175183057785, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030528103816322982, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.09522267617285252, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875, "eval_signal/frontier_ece_reward/group_std_mean": 0.12967629730701447, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009522267850115895, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009522267850115895, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3184318542480469, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3345780223608017, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03184318542480469, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03184318542480469, "eval_steps_per_second": 0.214, "step": 50 }, { "calibration/aurc": 0.42845597095272925, "calibration/batch_distribution_entropy": 0.9743751131787548, "calibration/batch_entropy_100bins": 0.9510459343045478, "calibration/batch_entropy_10bins": 0.9743751131787548, "calibration/batch_entropy_50bins": 0.9671359424561432, "calibration/batch_uniqueness": 0.9603113397334159, "calibration/buffer_distribution_entropy": 0.9585476127536652, "calibration/buffer_entropy_100bins": 0.8791153575811542, "calibration/buffer_entropy_10bins": 0.9585476127536652, "calibration/buffer_entropy_50bins": 0.9184203673259261, "calibration/confidence_entropy": 0.49344108216528165, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.00078125, "calibration/coverage@20%": 0.00078125, "calibration/coverage@25%": 0.00078125, "calibration/coverage@30%": 0.06173938967710372, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.19620665052494485, "calibration/mean_confidence": 0.5846989709145939, "calibration/prompt_uniqueness": 0.8889120524681321, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 665.2, "completions/max_terminated_length": 458.2, "completions/mean_length": 146.85908203125, "completions/mean_terminated_length": 146.58725280761718, "completions/min_length": 54.4, "completions/min_terminated_length": 54.4, "epoch": 0.176, "grad_norm": 0.0016376320272684097, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 183127029.0, "reward": 0.8913762331008911, "reward_std": 0.13524161875247956, "rewards/accuracy_reward": 0.4572265625, "rewards/brier_reward": 0.7286172389984131, "rewards/confidence_uniqueness_reward": 0.9600118160247803, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.004195977700874209, "rewards/frontier_coverage_0": 0.09039346724748612, "rewards/frontier_coverage_1": 0.09039346724748612, "rewards/frontier_coverage_10": 0.09039346724748612, "rewards/frontier_coverage_15": 0.09039346724748612, "rewards/frontier_coverage_20": 0.09039346724748612, "rewards/frontier_coverage_25": 0.09039346724748612, "rewards/frontier_coverage_5": 0.09039346724748612, "rewards/frontier_ece_reward": 0.01440376602113247, "rewards/frontier_entropy_batch_reward": -0.1510435476899147, "signal/accuracy_reward/centered_abs_mean": 0.1582275390625, "signal/accuracy_reward/group_bin_occupancy": 0.194140625, "signal/accuracy_reward/group_std_mean": 0.2026852160692215, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07911376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07911376953125, "signal/advantage_abs_mean": 0.10643114447593689, "signal/advantage_pre_scale_abs_mean": 0.10643114447593689, "signal/advantage_pre_scale_std": 0.1524705171585083, "signal/advantage_std": 0.1524705171585083, "signal/brier_reward/centered_abs_mean": 0.19858744144439697, "signal/brier_reward/group_bin_occupancy": 0.90390625, "signal/brier_reward/group_std_mean": 0.24619080722332, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019858743995428085, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019858743995428085, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013402053527534008, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.914453125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018923624232411385, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013402053853496909, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013402053853496909, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029607733245939015, "signal/frontier_aurc_reward/group_bin_occupancy": 0.816796875, "signal/frontier_aurc_reward/group_std_mean": 0.004161783494055271, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.700966844917275e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.700966844917275e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_0/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_0/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_1/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_1/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_1/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_10/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_10/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_10/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_15/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_15/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_15/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_20/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_20/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_20/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_25/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_25/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_25/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_5/centered_abs_mean": 0.20848225355148314, "signal/frontier_coverage_5/group_bin_occupancy": 0.898046875, "signal/frontier_coverage_5/group_std_mean": 0.27132275700569153, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026060281787067653, "signal/frontier_ece_reward/centered_abs_mean": 0.07936635911464691, "signal/frontier_ece_reward/group_bin_occupancy": 0.798828125, "signal/frontier_ece_reward/group_std_mean": 0.10026746243238449, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007936635799705983, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007936635799705983, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23007346987724303, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.815234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.305187976360321, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02300734743475914, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02300734743475914, "step": 55 }, { "calibration/aurc": 0.36150441430089725, "calibration/batch_distribution_entropy": 0.9809884793671644, "calibration/batch_entropy_100bins": 0.9553038792513879, "calibration/batch_entropy_10bins": 0.9809884793671644, "calibration/batch_entropy_50bins": 0.9725050248395928, "calibration/batch_uniqueness": 0.9608878320253972, "calibration/buffer_distribution_entropy": 0.962701946306086, "calibration/buffer_entropy_100bins": 0.8976021302572151, "calibration/buffer_entropy_10bins": 0.962701946306086, "calibration/buffer_entropy_50bins": 0.931815885579519, "calibration/confidence_entropy": 0.4694115267660267, "calibration/coverage@0%": 0.003913894324853229, "calibration/coverage@1%": 0.003913894324853229, "calibration/coverage@10%": 0.003913894324853229, "calibration/coverage@15%": 0.003913894324853229, "calibration/coverage@20%": 0.06016848091976516, "calibration/coverage@25%": 0.1422547700587084, "calibration/coverage@30%": 0.36152687744618395, "calibration/coverage@5%": 0.003913894324853229, "calibration/ece": 0.13340543732164134, "calibration/mean_confidence": 0.5382121094846762, "calibration/prompt_uniqueness": 0.8838947974928459, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1094.8, "completions/max_terminated_length": 448.8, "completions/mean_length": 155.36318359375, "completions/mean_terminated_length": 154.8236053466797, "completions/min_length": 60.6, "completions/min_terminated_length": 60.6, "epoch": 0.192, "grad_norm": 0.0012104158522561193, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 199532764.0, "reward": 0.9095940351486206, "reward_std": 0.12004156708717346, "rewards/accuracy_reward": 0.48525390625, "rewards/brier_reward": 0.7442206859588623, "rewards/confidence_uniqueness_reward": 0.9607667922973633, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003671001689508557, "rewards/frontier_coverage_0": 0.10046138539910317, "rewards/frontier_coverage_1": 0.10046138539910317, "rewards/frontier_coverage_10": 0.10046138539910317, "rewards/frontier_coverage_15": 0.10046138539910317, "rewards/frontier_coverage_20": 0.10046138539910317, "rewards/frontier_coverage_25": 0.10046138539910317, "rewards/frontier_coverage_5": 0.10046138539910317, "rewards/frontier_ece_reward": 0.02302660271525383, "rewards/frontier_entropy_batch_reward": -0.1423702985048294, "signal/accuracy_reward/centered_abs_mean": 0.138128662109375, "signal/accuracy_reward/group_bin_occupancy": 0.1890625, "signal/accuracy_reward/group_std_mean": 0.18108512461185455, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0690643310546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0690643310546875, "signal/advantage_abs_mean": 0.09277227818965912, "signal/advantage_pre_scale_abs_mean": 0.09277227818965912, "signal/advantage_pre_scale_std": 0.13744349181652069, "signal/advantage_std": 0.13744349181652069, "signal/brier_reward/centered_abs_mean": 0.19675520658493043, "signal/brier_reward/group_bin_occupancy": 0.88359375, "signal/brier_reward/group_std_mean": 0.24640358686447145, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019675521552562712, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019675521552562712, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014549448899924756, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.853515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.021208246052265168, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014549449319019915, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014549449319019915, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026927752885967495, "signal/frontier_aurc_reward/group_bin_occupancy": 0.79921875, "signal/frontier_aurc_reward/group_std_mean": 0.0038467171136289833, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3659690961940215e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3659690961940215e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_0/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_0/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_1/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_1/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_1/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_10/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_10/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_10/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_15/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_15/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_15/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_20/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_20/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_20/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_25/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_25/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_25/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_5/centered_abs_mean": 0.22805612087249755, "signal/frontier_coverage_5/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_5/group_std_mean": 0.29446661472320557, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028507016133517025, "signal/frontier_ece_reward/centered_abs_mean": 0.07045196145772933, "signal/frontier_ece_reward/group_bin_occupancy": 0.7609375, "signal/frontier_ece_reward/group_std_mean": 0.08983934074640273, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007045195996761322, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007045195996761322, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2271820455789566, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3046163022518158, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02271820567548275, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02271820567548275, "step": 60 }, { "calibration/aurc": 0.30856681251726314, "calibration/batch_distribution_entropy": 0.976034904740984, "calibration/batch_entropy_100bins": 0.9546305905247134, "calibration/batch_entropy_10bins": 0.976034904740984, "calibration/batch_entropy_50bins": 0.9708003879239427, "calibration/batch_uniqueness": 0.9573702970661492, "calibration/buffer_distribution_entropy": 0.9666963516015349, "calibration/buffer_entropy_100bins": 0.9110968940643943, "calibration/buffer_entropy_10bins": 0.9666963516015349, "calibration/buffer_entropy_50bins": 0.9410400607285831, "calibration/confidence_entropy": 0.46580401448310244, "calibration/coverage@0%": 0.0070343077299412915, "calibration/coverage@1%": 0.0070343077299412915, "calibration/coverage@10%": 0.09375305772994129, "calibration/coverage@15%": 0.1668335677592955, "calibration/coverage@20%": 0.3592664505870842, "calibration/coverage@25%": 0.47539674045988256, "calibration/coverage@30%": 0.5375703277886498, "calibration/coverage@5%": 0.01797180772994129, "calibration/ece": 0.17415111158550767, "calibration/mean_confidence": 0.48247349070355083, "calibration/prompt_uniqueness": 0.8831559735952134, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 864.6, "completions/max_terminated_length": 429.2, "completions/mean_length": 167.91220703125, "completions/mean_terminated_length": 167.64508056640625, "completions/min_length": 64.6, "completions/min_terminated_length": 64.6, "epoch": 0.208, "grad_norm": 0.0012852454092353582, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 216284409.0, "reward": 0.9297733783721924, "reward_std": 0.11357135176658631, "rewards/accuracy_reward": 0.521875, "rewards/brier_reward": 0.7487919449806213, "rewards/confidence_uniqueness_reward": 0.9588976621627807, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003094815369695425, "rewards/frontier_coverage_0": 0.08237677216529846, "rewards/frontier_coverage_1": 0.08237677216529846, "rewards/frontier_coverage_10": 0.08237677216529846, "rewards/frontier_coverage_15": 0.08237677216529846, "rewards/frontier_coverage_20": 0.08237677216529846, "rewards/frontier_coverage_25": 0.08237677216529846, "rewards/frontier_coverage_5": 0.08237677216529846, "rewards/frontier_ece_reward": 0.025132818147540092, "rewards/frontier_entropy_batch_reward": -0.11371518671512604, "signal/accuracy_reward/centered_abs_mean": 0.14034423828125, "signal/accuracy_reward/group_bin_occupancy": 0.18984375, "signal/accuracy_reward/group_std_mean": 0.18411757349967955, "signal/accuracy_reward/group_zero_std_frac": 0.48125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.070172119140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.070172119140625, "signal/advantage_abs_mean": 0.08810736685991287, "signal/advantage_pre_scale_abs_mean": 0.08810736685991287, "signal/advantage_pre_scale_std": 0.1309487298130989, "signal/advantage_std": 0.1309487298130989, "signal/brier_reward/centered_abs_mean": 0.19533415138721466, "signal/brier_reward/group_bin_occupancy": 0.87734375, "signal/brier_reward/group_std_mean": 0.24378657042980195, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019533416256308556, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019533416256308556, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014175088331103324, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.910546875, "signal/confidence_uniqueness_reward/group_std_mean": 0.019649384170770647, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014175089076161385, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014175089076161385, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022600206080824136, "signal/frontier_aurc_reward/group_bin_occupancy": 0.785546875, "signal/frontier_aurc_reward/group_std_mean": 0.0033125653862953186, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8250257309991865e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8250257309991865e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_0/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_0/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_1/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_1/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_1/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_10/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_10/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_10/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_15/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_15/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_15/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_20/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_20/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_20/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_25/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_25/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_25/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_5/centered_abs_mean": 0.25006471276283265, "signal/frontier_coverage_5/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_5/group_std_mean": 0.3173247754573822, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003125808946788311, "signal/frontier_ece_reward/centered_abs_mean": 0.05986908376216889, "signal/frontier_ece_reward/group_bin_occupancy": 0.729296875, "signal/frontier_ece_reward/group_std_mean": 0.07776331305503845, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0059869085438549515, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0059869085438549515, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1874374121427536, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.765234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25852798819541933, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018743741139769555, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018743741139769555, "step": 65 }, { "calibration/aurc": 0.330596438844872, "calibration/batch_distribution_entropy": 0.9800789256145354, "calibration/batch_entropy_100bins": 0.9652221422853536, "calibration/batch_entropy_10bins": 0.9800789256145354, "calibration/batch_entropy_50bins": 0.9783522166237942, "calibration/batch_uniqueness": 0.9553811352943624, "calibration/buffer_distribution_entropy": 0.9722985901730118, "calibration/buffer_entropy_100bins": 0.9226661190460972, "calibration/buffer_entropy_10bins": 0.9722985901730118, "calibration/buffer_entropy_50bins": 0.9495554922127216, "calibration/confidence_entropy": 0.479133780716967, "calibration/coverage@0%": 0.017207375244618395, "calibration/coverage@1%": 0.017207375244618395, "calibration/coverage@10%": 0.08995994373776908, "calibration/coverage@15%": 0.1708827666340509, "calibration/coverage@20%": 0.2517765410958904, "calibration/coverage@25%": 0.38699394569471623, "calibration/coverage@30%": 0.4792227250489237, "calibration/coverage@5%": 0.019947101272015655, "calibration/ece": 0.18975004661276867, "calibration/mean_confidence": 0.4268005781223603, "calibration/prompt_uniqueness": 0.879099580921566, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1016.2, "completions/max_terminated_length": 609.6, "completions/mean_length": 177.30166015625, "completions/mean_terminated_length": 176.6397247314453, "completions/min_length": 73.6, "completions/min_terminated_length": 73.6, "epoch": 0.224, "grad_norm": 0.0010515432804822922, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 233253162.0, "reward": 0.9137272119522095, "reward_std": 0.10092450678348541, "rewards/accuracy_reward": 0.4833984375, "rewards/brier_reward": 0.7625031471252441, "rewards/confidence_uniqueness_reward": 0.9558073043823242, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0030704465694725513, "rewards/frontier_coverage_0": 0.12124817669391633, "rewards/frontier_coverage_1": 0.12124817669391633, "rewards/frontier_coverage_10": 0.12124817669391633, "rewards/frontier_coverage_15": 0.12124817669391633, "rewards/frontier_coverage_20": 0.12124817669391633, "rewards/frontier_coverage_25": 0.12124817669391633, "rewards/frontier_coverage_5": 0.12124817669391633, "rewards/frontier_ece_reward": 0.021660603955388068, "rewards/frontier_entropy_batch_reward": -0.12246965020895004, "signal/accuracy_reward/centered_abs_mean": 0.11944580078125, "signal/accuracy_reward/group_bin_occupancy": 0.18359375, "signal/accuracy_reward/group_std_mean": 0.15991852879524232, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059722900390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.059722900390625, "signal/advantage_abs_mean": 0.07758828401565551, "signal/advantage_pre_scale_abs_mean": 0.07758828401565551, "signal/advantage_pre_scale_std": 0.11865905672311783, "signal/advantage_std": 0.11865905672311783, "signal/brier_reward/centered_abs_mean": 0.17773639261722565, "signal/brier_reward/group_bin_occupancy": 0.8609375, "signal/brier_reward/group_std_mean": 0.22495804727077484, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017773639410734177, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017773639410734177, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014823544770479202, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.925, "signal/confidence_uniqueness_reward/group_std_mean": 0.020474844425916672, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001482354523614049, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001482354523614049, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019347959896549582, "signal/frontier_aurc_reward/group_bin_occupancy": 0.779296875, "signal/frontier_aurc_reward/group_std_mean": 0.0028501675464212895, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4184949143091217e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4184949143091217e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_0/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_0/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_1/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_1/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_1/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_10/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_10/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_10/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_15/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_15/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_15/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_20/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_20/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_20/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_25/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_25/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_25/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_5/centered_abs_mean": 0.2396583765745163, "signal/frontier_coverage_5/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_5/group_std_mean": 0.3042136013507843, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002995729772374034, "signal/frontier_ece_reward/centered_abs_mean": 0.048828136175870895, "signal/frontier_ece_reward/group_bin_occupancy": 0.698046875, "signal/frontier_ece_reward/group_std_mean": 0.06492637246847152, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004882813710719347, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004882813710719347, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18858475387096404, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2542591840028763, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01885847598314285, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01885847598314285, "step": 70 }, { "calibration/aurc": 0.37536408814265015, "calibration/batch_distribution_entropy": 0.9771937196120367, "calibration/batch_entropy_100bins": 0.9607011820953872, "calibration/batch_entropy_10bins": 0.9771937196120367, "calibration/batch_entropy_50bins": 0.9735030092986381, "calibration/batch_uniqueness": 0.9562491780901577, "calibration/buffer_distribution_entropy": 0.9769076636131834, "calibration/buffer_entropy_100bins": 0.9320570034740199, "calibration/buffer_entropy_10bins": 0.9769076636131834, "calibration/buffer_entropy_50bins": 0.9563579445536927, "calibration/confidence_entropy": 0.5054295072946868, "calibration/coverage@0%": 0.011331947162426614, "calibration/coverage@1%": 0.011331947162426614, "calibration/coverage@10%": 0.07734757216242662, "calibration/coverage@15%": 0.1140663221624266, "calibration/coverage@20%": 0.1898475721624266, "calibration/coverage@25%": 0.2398475721624266, "calibration/coverage@30%": 0.2921913221624266, "calibration/coverage@5%": 0.04609757216242662, "calibration/ece": 0.1579841463548282, "calibration/mean_confidence": 0.48800210324158166, "calibration/prompt_uniqueness": 0.884576765332336, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1536.0, "completions/max_terminated_length": 705.4, "completions/mean_length": 184.08701171875, "completions/mean_terminated_length": 183.16248779296876, "completions/min_length": 73.4, "completions/min_terminated_length": 73.4, "epoch": 0.24, "grad_norm": 0.0011616898700594902, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 250389893.0, "reward": 0.9384328126907349, "reward_std": 0.11288020461797714, "rewards/accuracy_reward": 0.5419921875, "rewards/brier_reward": 0.7529522061347962, "rewards/confidence_uniqueness_reward": 0.9568016529083252, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002845893194898963, "rewards/frontier_coverage_0": 0.06113246735185385, "rewards/frontier_coverage_1": 0.06113246735185385, "rewards/frontier_coverage_10": 0.06113246735185385, "rewards/frontier_coverage_15": 0.06113246735185385, "rewards/frontier_coverage_20": 0.06113246735185385, "rewards/frontier_coverage_25": 0.06113246735185385, "rewards/frontier_coverage_5": 0.06113246735185385, "rewards/frontier_ece_reward": 0.022722626104950905, "rewards/frontier_entropy_batch_reward": -0.10733838081359863, "signal/accuracy_reward/centered_abs_mean": 0.14188232421875, "signal/accuracy_reward/group_bin_occupancy": 0.191796875, "signal/accuracy_reward/group_std_mean": 0.18681408166885377, "signal/accuracy_reward/group_zero_std_frac": 0.465625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.070941162109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.070941162109375, "signal/advantage_abs_mean": 0.08786697685718536, "signal/advantage_pre_scale_abs_mean": 0.08786697685718536, "signal/advantage_pre_scale_std": 0.13164357095956802, "signal/advantage_std": 0.13164357095956802, "signal/brier_reward/centered_abs_mean": 0.18108824789524078, "signal/brier_reward/group_bin_occupancy": 0.88046875, "signal/brier_reward/group_std_mean": 0.22850363552570344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018108825013041497, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018108825013041497, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01398250348865986, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.937109375, "signal/confidence_uniqueness_reward/group_std_mean": 0.019881158694624902, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013982503674924373, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013982503674924373, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_bin_occupancy": 0.128125, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021438012598082425, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375, "signal/frontier_aurc_reward/group_std_mean": 0.0031446309760212897, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.679751632967964e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.679751632967964e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_0/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_0/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_1/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_1/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_1/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_10/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_10/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_10/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_15/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_15/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_15/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_20/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_20/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_20/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_25/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_25/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_25/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_5/centered_abs_mean": 0.23010546565055848, "signal/frontier_coverage_5/group_bin_occupancy": 0.90546875, "signal/frontier_coverage_5/group_std_mean": 0.29437238574028013, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028763184323906898, "signal/frontier_ece_reward/centered_abs_mean": 0.04993258342146874, "signal/frontier_ece_reward/group_bin_occupancy": 0.71328125, "signal/frontier_ece_reward/group_std_mean": 0.0665904238820076, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004993258509784937, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004993258509784937, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17947104573249817, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24450061917304994, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017947105318307878, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017947105318307878, "step": 75 }, { "calibration/aurc": 0.3088122502636872, "calibration/batch_distribution_entropy": 0.9862230260996881, "calibration/batch_entropy_100bins": 0.9645962804983934, "calibration/batch_entropy_10bins": 0.9862230260996881, "calibration/batch_entropy_50bins": 0.979222583210366, "calibration/batch_uniqueness": 0.9582794189453125, "calibration/buffer_distribution_entropy": 0.9794717446767558, "calibration/buffer_entropy_100bins": 0.9393690519837218, "calibration/buffer_entropy_10bins": 0.9794717446767558, "calibration/buffer_entropy_50bins": 0.961310714439325, "calibration/confidence_entropy": 0.48274468276614363, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.066015625, "calibration/coverage@15%": 0.230859375, "calibration/coverage@20%": 0.31796875, "calibration/coverage@25%": 0.46640625, "calibration/coverage@30%": 0.582421875, "calibration/coverage@5%": 0.000390625, "calibration/ece": 0.12116486342122168, "calibration/mean_confidence": 0.512004733953818, "calibration/prompt_uniqueness": 0.882861328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1173.8, "completions/max_terminated_length": 689.0, "completions/mean_length": 182.08017578125, "completions/mean_terminated_length": 181.02371826171876, "completions/min_length": 80.4, "completions/min_terminated_length": 80.4, "epoch": 0.256, "grad_norm": 0.0011187122436240315, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 267309210.0, "reward": 0.9286181807518006, "reward_std": 0.1064249649643898, "rewards/accuracy_reward": 0.513671875, "rewards/brier_reward": 0.7663417935371399, "rewards/confidence_uniqueness_reward": 0.9569314360618592, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.0030292498413473368, "rewards/frontier_coverage_0": 0.09447629451751709, "rewards/frontier_coverage_1": 0.09447629451751709, "rewards/frontier_coverage_10": 0.09447629451751709, "rewards/frontier_coverage_15": 0.09447629451751709, "rewards/frontier_coverage_20": 0.09447629451751709, "rewards/frontier_coverage_25": 0.09447629451751709, "rewards/frontier_coverage_5": 0.09447629451751709, "rewards/frontier_ece_reward": 0.02433442622423172, "rewards/frontier_entropy_batch_reward": -0.10767877101898193, "signal/accuracy_reward/centered_abs_mean": 0.13363037109375, "signal/accuracy_reward/group_bin_occupancy": 0.18359375, "signal/accuracy_reward/group_std_mean": 0.17056742310523987, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.066815185546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.066815185546875, "signal/advantage_abs_mean": 0.08388981521129608, "signal/advantage_pre_scale_abs_mean": 0.08388981521129608, "signal/advantage_pre_scale_std": 0.12852715700864792, "signal/advantage_std": 0.12852715700864792, "signal/brier_reward/centered_abs_mean": 0.1720704823732376, "signal/brier_reward/group_bin_occupancy": 0.863671875, "signal/brier_reward/group_std_mean": 0.21704732179641723, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01720704808831215, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01720704808831215, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013470648415386676, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.943359375, "signal/confidence_uniqueness_reward/group_std_mean": 0.018156062439084054, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013470648787915706, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013470648787915706, "signal/format_reward/centered_abs_mean": 0.001580810546875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0033625275362282993, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007904052734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007904052734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024374906904995443, "signal/frontier_aurc_reward/group_bin_occupancy": 0.770703125, "signal/frontier_aurc_reward/group_std_mean": 0.0035979004576802255, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0468634213320912e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0468634213320912e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_0/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_0/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_1/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_1/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_1/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_10/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_10/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_10/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_15/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_15/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_15/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_20/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_25/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_25/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_25/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_5/centered_abs_mean": 0.2125555694103241, "signal/frontier_coverage_5/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_5/group_std_mean": 0.2719772934913635, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026569446548819543, "signal/frontier_ece_reward/centered_abs_mean": 0.050062181800603865, "signal/frontier_ece_reward/group_bin_occupancy": 0.685546875, "signal/frontier_ece_reward/group_std_mean": 0.06556581407785415, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050062181428074835, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050062181428074835, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.175579434633255, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.78125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23965271115303038, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017557943984866143, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017557943984866143, "step": 80 }, { "calibration/aurc": 0.3872883248462567, "calibration/batch_distribution_entropy": 0.9911252411531031, "calibration/batch_entropy_100bins": 0.9668056025762068, "calibration/batch_entropy_10bins": 0.9911252411531031, "calibration/batch_entropy_50bins": 0.9829774945880942, "calibration/batch_uniqueness": 0.9590049221415742, "calibration/buffer_distribution_entropy": 0.9814864706391244, "calibration/buffer_entropy_100bins": 0.945556175624465, "calibration/buffer_entropy_10bins": 0.9814864706391244, "calibration/buffer_entropy_50bins": 0.9654573043868198, "calibration/confidence_entropy": 0.4982663637951156, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.042723651960784315, "calibration/coverage@15%": 0.11563265931372549, "calibration/coverage@20%": 0.16890624999999998, "calibration/coverage@25%": 0.2233057598039216, "calibration/coverage@30%": 0.32767310049019605, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.15179049772902994, "calibration/mean_confidence": 0.5230270996410378, "calibration/prompt_uniqueness": 0.8800985243055555, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1133.8, "completions/max_terminated_length": 661.6, "completions/mean_length": 187.561328125, "completions/mean_terminated_length": 187.0335693359375, "completions/min_length": 82.2, "completions/min_terminated_length": 82.2, "epoch": 0.272, "grad_norm": 0.0012320392997935414, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 284195534.0, "reward": 0.9237002849578857, "reward_std": 0.10775048434734344, "rewards/accuracy_reward": 0.5041015625, "rewards/brier_reward": 0.7605077266693115, "rewards/confidence_uniqueness_reward": 0.9581493377685547, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0031376248225569725, "rewards/frontier_coverage_0": 0.09240868501365185, "rewards/frontier_coverage_1": 0.09240868501365185, "rewards/frontier_coverage_10": 0.09240868501365185, "rewards/frontier_coverage_15": 0.09240868501365185, "rewards/frontier_coverage_20": 0.09240868501365185, "rewards/frontier_coverage_25": 0.09240868501365185, "rewards/frontier_coverage_5": 0.09240868501365185, "rewards/frontier_ece_reward": 0.022019311785697937, "rewards/frontier_entropy_batch_reward": -0.10122893005609512, "signal/accuracy_reward/centered_abs_mean": 0.13126220703125, "signal/accuracy_reward/group_bin_occupancy": 0.180859375, "signal/accuracy_reward/group_std_mean": 0.16641454994678498, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065631103515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.065631103515625, "signal/advantage_abs_mean": 0.08459821194410325, "signal/advantage_pre_scale_abs_mean": 0.08459821194410325, "signal/advantage_pre_scale_std": 0.13007204085588456, "signal/advantage_std": 0.13007204085588456, "signal/brier_reward/centered_abs_mean": 0.16944461762905122, "signal/brier_reward/group_bin_occupancy": 0.869140625, "signal/brier_reward/group_std_mean": 0.2140252709388733, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016944462060928346, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016944462060928346, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012623942643404006, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.930078125, "signal/confidence_uniqueness_reward/group_std_mean": 0.017593150585889818, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012623942689970135, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012623942689970135, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0035306816920638085, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002525777369737625, "signal/frontier_aurc_reward/group_bin_occupancy": 0.78125, "signal/frontier_aurc_reward/group_std_mean": 0.0036961573641747236, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1572217631037346e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1572217631037346e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_0/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_0/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_1/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_1/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_1/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_10/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_10/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_10/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_15/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_15/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_15/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_20/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_20/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_20/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_25/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_25/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_25/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_5/centered_abs_mean": 0.20187339186668396, "signal/frontier_coverage_5/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_5/group_std_mean": 0.2607047349214554, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002523417491465807, "signal/frontier_ece_reward/centered_abs_mean": 0.04795216247439384, "signal/frontier_ece_reward/group_bin_occupancy": 0.662109375, "signal/frontier_ece_reward/group_std_mean": 0.06269470900297165, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004795216396450997, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004795216396450997, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16949324011802674, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23497817516326905, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01694932412356138, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01694932412356138, "step": 85 }, { "calibration/aurc": 0.34581066310791553, "calibration/batch_distribution_entropy": 0.9819802009108018, "calibration/batch_entropy_100bins": 0.9615195052648016, "calibration/batch_entropy_10bins": 0.9819802009108018, "calibration/batch_entropy_50bins": 0.9762919627953279, "calibration/batch_uniqueness": 0.958841547683193, "calibration/buffer_distribution_entropy": 0.9829452406186754, "calibration/buffer_entropy_100bins": 0.9506370434085273, "calibration/buffer_entropy_10bins": 0.9829452406186754, "calibration/buffer_entropy_50bins": 0.9689288935812674, "calibration/confidence_entropy": 0.4889890321665257, "calibration/coverage@0%": 0.00390625, "calibration/coverage@1%": 0.00390625, "calibration/coverage@10%": 0.03678296232876712, "calibration/coverage@15%": 0.0778780883072407, "calibration/coverage@20%": 0.15996131971624267, "calibration/coverage@25%": 0.20687912793542074, "calibration/coverage@30%": 0.25455525318003913, "calibration/coverage@5%": 0.012908206947162427, "calibration/ece": 0.12166334605382478, "calibration/mean_confidence": 0.5602370063816919, "calibration/prompt_uniqueness": 0.88345570003577, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 867.0, "completions/max_terminated_length": 444.6, "completions/mean_length": 183.744921875, "completions/mean_terminated_length": 183.48092651367188, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.288, "grad_norm": 0.001005663420073688, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 301035258.0, "reward": 0.9296231985092163, "reward_std": 0.10486756712198257, "rewards/accuracy_reward": 0.51689453125, "rewards/brier_reward": 0.7634823560714722, "rewards/confidence_uniqueness_reward": 0.957963502407074, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0030765291303396225, "rewards/frontier_coverage_0": 0.09391801804304123, "rewards/frontier_coverage_1": 0.09391801804304123, "rewards/frontier_coverage_10": 0.09391801804304123, "rewards/frontier_coverage_15": 0.09391801804304123, "rewards/frontier_coverage_20": 0.09391801804304123, "rewards/frontier_coverage_25": 0.09391801804304123, "rewards/frontier_coverage_5": 0.09391801804304123, "rewards/frontier_ece_reward": 0.022438769787549974, "rewards/frontier_entropy_batch_reward": -0.11196594089269638, "signal/accuracy_reward/centered_abs_mean": 0.128155517578125, "signal/accuracy_reward/group_bin_occupancy": 0.185546875, "signal/accuracy_reward/group_std_mean": 0.16995208263397216, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0640777587890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0640777587890625, "signal/advantage_abs_mean": 0.08041936606168747, "signal/advantage_pre_scale_abs_mean": 0.08041936606168747, "signal/advantage_pre_scale_std": 0.1254291296005249, "signal/advantage_std": 0.1254291296005249, "signal/brier_reward/centered_abs_mean": 0.1697759747505188, "signal/brier_reward/group_bin_occupancy": 0.859765625, "signal/brier_reward/group_std_mean": 0.21578840911388397, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016977597773075104, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016977597773075104, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012654472142457962, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.916015625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01737392246723175, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012654472608119248, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012654472608119248, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002495748782530427, "signal/frontier_aurc_reward/group_bin_occupancy": 0.775390625, "signal/frontier_aurc_reward/group_std_mean": 0.0036962830927222967, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.119685970887076e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.119685970887076e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_0/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_0/group_std_mean": 0.265233251452446, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_1/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_1/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_1/group_std_mean": 0.265233251452446, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_10/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_10/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_10/group_std_mean": 0.265233251452446, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_15/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.265233251452446, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_20/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_20/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_20/group_std_mean": 0.265233251452446, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_25/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_25/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_25/group_std_mean": 0.265233251452446, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_5/centered_abs_mean": 0.2049511432647705, "signal/frontier_coverage_5/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_5/group_std_mean": 0.265233251452446, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00256188940256834, "signal/frontier_ece_reward/centered_abs_mean": 0.04639850929379463, "signal/frontier_ece_reward/group_bin_occupancy": 0.655859375, "signal/frontier_ece_reward/group_std_mean": 0.06019414514303208, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004639850929379463, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004639850929379463, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18500931262969972, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.766015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2503536373376846, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018500932306051255, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018500932306051255, "step": 90 }, { "calibration/aurc": 0.30831422750676263, "calibration/batch_distribution_entropy": 0.9839200839606193, "calibration/batch_entropy_100bins": 0.9640602725866346, "calibration/batch_entropy_10bins": 0.9839200839606193, "calibration/batch_entropy_50bins": 0.9787751606670845, "calibration/batch_uniqueness": 0.957394812760023, "calibration/buffer_distribution_entropy": 0.9838445722781197, "calibration/buffer_entropy_100bins": 0.9547516645514659, "calibration/buffer_entropy_10bins": 0.9838445722781197, "calibration/buffer_entropy_50bins": 0.9715224456315298, "calibration/confidence_entropy": 0.4862570490637088, "calibration/coverage@0%": 0.016037793542074363, "calibration/coverage@1%": 0.016037793542074363, "calibration/coverage@10%": 0.06649798189823874, "calibration/coverage@15%": 0.14706152152641877, "calibration/coverage@20%": 0.27959806139921717, "calibration/coverage@25%": 0.36794046599804303, "calibration/coverage@30%": 0.46173709637964777, "calibration/coverage@5%": 0.025431139921722114, "calibration/ece": 0.12583506352110047, "calibration/mean_confidence": 0.5307190906026357, "calibration/prompt_uniqueness": 0.8810963514568158, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1214.6, "completions/max_terminated_length": 601.2, "completions/mean_length": 192.29521484375, "completions/mean_terminated_length": 191.76925048828124, "completions/min_length": 85.2, "completions/min_terminated_length": 85.2, "epoch": 0.304, "grad_norm": 0.0009384758886881173, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 317934313.0, "reward": 0.9246511697769165, "reward_std": 0.10153087228536606, "rewards/accuracy_reward": 0.50927734375, "rewards/brier_reward": 0.7537349700927735, "rewards/confidence_uniqueness_reward": 0.9565217852592468, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002995410794392228, "rewards/frontier_coverage_0": 0.09300088435411454, "rewards/frontier_coverage_1": 0.09300088435411454, "rewards/frontier_coverage_10": 0.09300088435411454, "rewards/frontier_coverage_15": 0.09300088435411454, "rewards/frontier_coverage_20": 0.09300088435411454, "rewards/frontier_coverage_25": 0.09300088435411454, "rewards/frontier_coverage_5": 0.09300088435411454, "rewards/frontier_ece_reward": 0.0181322168558836, "rewards/frontier_entropy_batch_reward": -0.10633568167686462, "signal/accuracy_reward/centered_abs_mean": 0.124017333984375, "signal/accuracy_reward/group_bin_occupancy": 0.186328125, "signal/accuracy_reward/group_std_mean": 0.1662678450345993, "signal/accuracy_reward/group_zero_std_frac": 0.509375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620086669921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0620086669921875, "signal/advantage_abs_mean": 0.07806494235992431, "signal/advantage_pre_scale_abs_mean": 0.07806494235992431, "signal/advantage_pre_scale_std": 0.12007036358118058, "signal/advantage_std": 0.12007036358118058, "signal/brier_reward/centered_abs_mean": 0.17296849191188812, "signal/brier_reward/group_bin_occupancy": 0.864453125, "signal/brier_reward/group_std_mean": 0.21920109391212464, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017296848818659782, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017296848818659782, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012902907282114028, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.941015625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01767069585621357, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00129029075615108, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00129029075615108, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0029782544821500777, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022347769234329464, "signal/frontier_aurc_reward/group_bin_occupancy": 0.77109375, "signal/frontier_aurc_reward/group_std_mean": 0.003333268640562892, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7934712124988437e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7934712124988437e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_0/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_0/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_1/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_1/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_1/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_10/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_10/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_10/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_15/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_15/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_15/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_20/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_20/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_20/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_25/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_25/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_25/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_5/centered_abs_mean": 0.22140364944934846, "signal/frontier_coverage_5/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_5/group_std_mean": 0.2837002158164978, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002767545636743307, "signal/frontier_ece_reward/centered_abs_mean": 0.041382260620594025, "signal/frontier_ece_reward/group_bin_occupancy": 0.633984375, "signal/frontier_ece_reward/group_std_mean": 0.05412525683641434, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004138226062059403, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004138226062059403, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1742929309606552, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.769921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23640851378440858, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017429293505847453, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017429293505847453, "step": 95 }, { "calibration/aurc": 0.2659389940843186, "calibration/batch_distribution_entropy": 0.9876112772320713, "calibration/batch_entropy_100bins": 0.9641565709786863, "calibration/batch_entropy_10bins": 0.9876112772320713, "calibration/batch_entropy_50bins": 0.9764263295564872, "calibration/batch_uniqueness": 0.9552329008986323, "calibration/buffer_distribution_entropy": 0.9848758337567493, "calibration/buffer_entropy_100bins": 0.9586118774229, "calibration/buffer_entropy_10bins": 0.9848758337567493, "calibration/buffer_entropy_50bins": 0.9739535676958562, "calibration/confidence_entropy": 0.48793486664465907, "calibration/coverage@0%": 0.015629586594911937, "calibration/coverage@1%": 0.015629586594911937, "calibration/coverage@10%": 0.1556193431996086, "calibration/coverage@15%": 0.35648009417808224, "calibration/coverage@20%": 0.4373753975048924, "calibration/coverage@25%": 0.5413389799412915, "calibration/coverage@30%": 0.646870413405088, "calibration/coverage@5%": 0.052348336594911934, "calibration/ece": 0.14271041517983887, "calibration/mean_confidence": 0.5320063723249747, "calibration/prompt_uniqueness": 0.8741963937304892, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 931.0, "completions/max_terminated_length": 538.0, "completions/mean_length": 195.34619140625, "completions/mean_terminated_length": 195.08486938476562, "completions/min_length": 81.6, "completions/min_terminated_length": 81.6, "epoch": 0.32, "grad_norm": 0.0009181920322589576, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 335023362.0, "reward": 0.9346740245819092, "reward_std": 0.08925552219152451, "rewards/accuracy_reward": 0.52470703125, "rewards/brier_reward": 0.7718238949775695, "rewards/confidence_uniqueness_reward": 0.9552286267280579, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0027100421022623776, "rewards/frontier_coverage_0": 0.09961767829954624, "rewards/frontier_coverage_1": 0.09961767829954624, "rewards/frontier_coverage_10": 0.09961767829954624, "rewards/frontier_coverage_15": 0.09961767829954624, "rewards/frontier_coverage_20": 0.09961767829954624, "rewards/frontier_coverage_25": 0.09961767829954624, "rewards/frontier_coverage_5": 0.09961767829954624, "rewards/frontier_ece_reward": 0.022007835283875465, "rewards/frontier_entropy_batch_reward": -0.11024036258459091, "signal/accuracy_reward/centered_abs_mean": 0.096905517578125, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.1309303015470505, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484527587890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0484527587890625, "signal/advantage_abs_mean": 0.06786017194390297, "signal/advantage_pre_scale_abs_mean": 0.06786017194390297, "signal/advantage_pre_scale_std": 0.10885472595691681, "signal/advantage_std": 0.10885472595691681, "signal/brier_reward/centered_abs_mean": 0.15993238091468812, "signal/brier_reward/group_bin_occupancy": 0.8578125, "signal/brier_reward/group_std_mean": 0.20314022600650788, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015993238613009452, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015993238613009452, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013150414079427719, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.018019300326704978, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001315041445195675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001315041445195675, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021082177059724926, "signal/frontier_aurc_reward/group_bin_occupancy": 0.777734375, "signal/frontier_aurc_reward/group_std_mean": 0.003096911637112498, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6352722488809378e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6352722488809378e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_0/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_0/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_1/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_1/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_1/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_10/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_10/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_10/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_15/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_15/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_15/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_20/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_20/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_20/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_25/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_25/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_25/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_5/centered_abs_mean": 0.1996733397245407, "signal/frontier_coverage_5/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_5/group_std_mean": 0.25663221478462217, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00249591669999063, "signal/frontier_ece_reward/centered_abs_mean": 0.039423568546772, "signal/frontier_ece_reward/group_bin_occupancy": 0.628515625, "signal/frontier_ece_reward/group_std_mean": 0.05080499574542045, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003942356910556555, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003942356910556555, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18164745569229127, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77109375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24795118868350982, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018164745718240737, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018164745718240737, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5011022568208787, "eval_calibration/batch_distribution_entropy": 0.9195527723724205, "eval_calibration/batch_entropy_100bins": 0.7037637102944632, "eval_calibration/batch_entropy_10bins": 0.9195527723724205, "eval_calibration/batch_entropy_50bins": 0.779983490066886, "eval_calibration/batch_uniqueness": 0.8955078125, "eval_calibration/buffer_distribution_entropy": 0.9854447883057991, "eval_calibration/buffer_entropy_100bins": 0.9607979279881629, "eval_calibration/buffer_entropy_10bins": 0.9854447883057991, "eval_calibration/buffer_entropy_50bins": 0.9752806536193837, "eval_calibration/confidence_entropy": 0.46313100587975253, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.1015625, "eval_calibration/coverage@30%": 0.1015625, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.21962422290224884, "eval_calibration/mean_confidence": 0.4459440781139095, "eval_calibration/prompt_uniqueness": 0.8955078125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 598.0, "eval_completions/max_terminated_length": 598.0, "eval_completions/mean_length": 205.20069885253906, "eval_completions/mean_terminated_length": 205.20069885253906, "eval_completions/min_length": 97.75, "eval_completions/min_terminated_length": 97.75, "eval_loss": 0.0, "eval_num_tokens": 335023362.0, "eval_reward": 0.823313757777214, "eval_reward_std": 0.230881467461586, "eval_rewards/accuracy_reward": 0.404296875, "eval_rewards/brier_reward": 0.7818417847156525, "eval_rewards/confidence_uniqueness_reward": 0.90087890625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0032422358635812998, "eval_rewards/frontier_coverage_0": 0.19046474620699883, "eval_rewards/frontier_coverage_1": 0.19046474620699883, "eval_rewards/frontier_coverage_10": 0.19046474620699883, "eval_rewards/frontier_coverage_15": 0.19046474620699883, "eval_rewards/frontier_coverage_20": 0.19046474620699883, "eval_rewards/frontier_coverage_25": 0.19046474620699883, "eval_rewards/frontier_coverage_5": 0.19046474620699883, "eval_rewards/frontier_ece_reward": 0.015543812420219183, "eval_rewards/frontier_entropy_batch_reward": -0.652862548828125, "eval_runtime": 27.5363, "eval_samples_per_second": 18.158, "eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4924849271774292, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375, "eval_signal/advantage_abs_mean": 0.21217802911996841, "eval_signal/advantage_pre_scale_abs_mean": 0.21217802911996841, "eval_signal/advantage_pre_scale_std": 0.2284625768661499, "eval_signal/advantage_std": 0.2284625768661499, "eval_signal/brier_reward/centered_abs_mean": 0.19572831690311432, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.2521805912256241, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019572831690311432, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019572831690311432, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037384033203125, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3828125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.043207885697484016, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003738403378520161, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003738403378520161, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003047365229576826, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.765625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004978827317245305, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8092065551609267e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8092065551609267e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_0/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_20/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_25/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3660961836576462, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.4516659453511238, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004576202598400414, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.037898930720984936, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7265625, "eval_signal/frontier_ece_reward/group_std_mean": 0.06024767179042101, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003789893235079944, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003789893235079944, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3284454345703125, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.34034357219934464, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03284454345703125, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03284454345703125, "eval_steps_per_second": 0.145, "step": 100 }, { "calibration/aurc": 0.3048095368177367, "calibration/batch_distribution_entropy": 0.9827100885636348, "calibration/batch_entropy_100bins": 0.9700066493969526, "calibration/batch_entropy_10bins": 0.9827100885636348, "calibration/batch_entropy_50bins": 0.9797538783318143, "calibration/batch_uniqueness": 0.9539394591967032, "calibration/buffer_distribution_entropy": 0.9874702323709037, "calibration/buffer_entropy_100bins": 0.9646559344286517, "calibration/buffer_entropy_10bins": 0.9874702323709037, "calibration/buffer_entropy_50bins": 0.9780032413599857, "calibration/confidence_entropy": 0.49428211357645263, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.010945144324853228, "calibration/coverage@15%": 0.06413817881604697, "calibration/coverage@20%": 0.13452941536203522, "calibration/coverage@25%": 0.37636833414872795, "calibration/coverage@30%": 0.5475178877201565, "calibration/coverage@5%": 0.003515625, "calibration/ece": 0.13704558129813732, "calibration/mean_confidence": 0.4775055551271814, "calibration/prompt_uniqueness": 0.8719490805476067, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1152.0, "completions/max_terminated_length": 562.4, "completions/mean_length": 200.309765625, "completions/mean_terminated_length": 199.526708984375, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.336, "grad_norm": 0.0009267533314414322, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 351796966.0, "reward": 0.9373140454292297, "reward_std": 0.09476064741611481, "rewards/accuracy_reward": 0.53466796875, "rewards/brier_reward": 0.7662975668907166, "rewards/confidence_uniqueness_reward": 0.9530789375305175, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002560078864917159, "rewards/frontier_coverage_0": 0.08570035807788372, "rewards/frontier_coverage_1": 0.08570035807788372, "rewards/frontier_coverage_10": 0.08570035807788372, "rewards/frontier_coverage_15": 0.08570035807788372, "rewards/frontier_coverage_20": 0.08570035807788372, "rewards/frontier_coverage_25": 0.08570035807788372, "rewards/frontier_coverage_5": 0.08570035807788372, "rewards/frontier_ece_reward": 0.019346101209521294, "rewards/frontier_entropy_batch_reward": -0.1096835508942604, "signal/accuracy_reward/centered_abs_mean": 0.114910888671875, "signal/accuracy_reward/group_bin_occupancy": 0.180078125, "signal/accuracy_reward/group_std_mean": 0.15273061096668245, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574554443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0574554443359375, "signal/advantage_abs_mean": 0.07300383895635605, "signal/advantage_pre_scale_abs_mean": 0.07300383895635605, "signal/advantage_pre_scale_std": 0.11499524712562562, "signal/advantage_std": 0.11499524712562562, "signal/brier_reward/centered_abs_mean": 0.16318108439445494, "signal/brier_reward/group_bin_occupancy": 0.8609375, "signal/brier_reward/group_std_mean": 0.20483859181404113, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01631810814142227, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01631810814142227, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013554375991225243, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.929296875, "signal/confidence_uniqueness_reward/group_std_mean": 0.01912703476846218, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013554376550018788, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013554376550018788, "signal/format_reward/centered_abs_mean": 0.00150146484375, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.004083108808845282, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000750732421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020001448690891268, "signal/frontier_aurc_reward/group_bin_occupancy": 0.75625, "signal/frontier_aurc_reward/group_std_mean": 0.003017709869891405, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5001811809488573e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5001811809488573e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_0/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_0/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_1/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_1/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_1/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_10/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_10/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_10/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_15/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_15/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_15/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_20/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_20/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_20/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_25/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_25/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_25/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_5/centered_abs_mean": 0.21160376965999603, "signal/frontier_coverage_5/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_5/group_std_mean": 0.26891090869903567, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00264504705555737, "signal/frontier_ece_reward/centered_abs_mean": 0.034326578676700595, "signal/frontier_ece_reward/group_bin_occupancy": 0.629296875, "signal/frontier_ece_reward/group_std_mean": 0.045123565942049026, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003432658081874251, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003432658081874251, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1763071745634079, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.760546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23879291415214537, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01763071771711111, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01763071771711111, "step": 105 }, { "calibration/aurc": 0.3209504238614419, "calibration/batch_distribution_entropy": 0.9667923872892714, "calibration/batch_entropy_100bins": 0.9595862106455335, "calibration/batch_entropy_10bins": 0.9667923872892714, "calibration/batch_entropy_50bins": 0.9686096089602719, "calibration/batch_uniqueness": 0.9500500726518638, "calibration/buffer_distribution_entropy": 0.9931473031803522, "calibration/buffer_entropy_100bins": 0.9763724503789746, "calibration/buffer_entropy_10bins": 0.9931473031803522, "calibration/buffer_entropy_50bins": 0.9862473564066343, "calibration/confidence_entropy": 0.46706245514587097, "calibration/coverage@0%": 0.025416648680019954, "calibration/coverage@1%": 0.025416648680019954, "calibration/coverage@10%": 0.13932512302866354, "calibration/coverage@15%": 0.24454261456294848, "calibration/coverage@20%": 0.33253733428302834, "calibration/coverage@25%": 0.43029770598307815, "calibration/coverage@30%": 0.5132089376846629, "calibration/coverage@5%": 0.03520984720943172, "calibration/ece": 0.12471999058892232, "calibration/mean_confidence": 0.44264518386490936, "calibration/prompt_uniqueness": 0.8607651026762486, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 977.8, "completions/max_terminated_length": 576.2, "completions/mean_length": 201.4564453125, "completions/mean_terminated_length": 200.93560180664062, "completions/min_length": 84.2, "completions/min_terminated_length": 84.2, "epoch": 0.352, "grad_norm": 0.0009329073945991695, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 369120296.0, "reward": 0.908801531791687, "reward_std": 0.09598542004823685, "rewards/accuracy_reward": 0.47099609375, "rewards/brier_reward": 0.7735196352005005, "rewards/confidence_uniqueness_reward": 0.9524645686149598, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003027368104085326, "rewards/frontier_coverage_0": 0.13684964329004287, "rewards/frontier_coverage_1": 0.13684964329004287, "rewards/frontier_coverage_10": 0.13684964329004287, "rewards/frontier_coverage_15": 0.13684964329004287, "rewards/frontier_coverage_20": 0.13684964329004287, "rewards/frontier_coverage_25": 0.13684964329004287, "rewards/frontier_coverage_5": 0.13684964329004287, "rewards/frontier_ece_reward": 0.015464337170124054, "rewards/frontier_entropy_batch_reward": -0.12436114549636841, "signal/accuracy_reward/centered_abs_mean": 0.117779541015625, "signal/accuracy_reward/group_bin_occupancy": 0.18046875, "signal/accuracy_reward/group_std_mean": 0.15502216517925263, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0588897705078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0588897705078125, "signal/advantage_abs_mean": 0.0737321838736534, "signal/advantage_pre_scale_abs_mean": 0.0737321838736534, "signal/advantage_pre_scale_std": 0.11658578664064408, "signal/advantage_std": 0.11658578664064408, "signal/brier_reward/centered_abs_mean": 0.158657768368721, "signal/brier_reward/group_bin_occupancy": 0.85703125, "signal/brier_reward/group_std_mean": 0.20199429094791413, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01586577631533146, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01586577631533146, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013269886001944542, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.933984375, "signal/confidence_uniqueness_reward/group_std_mean": 0.018744326569139956, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001326988684013486, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001326988684013486, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023643441498279573, "signal/frontier_aurc_reward/group_bin_occupancy": 0.752734375, "signal/frontier_aurc_reward/group_std_mean": 0.0036352206021547317, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9554302818723956e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9554302818723956e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_0/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_0/group_std_mean": 0.268929660320282, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_1/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_1/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_1/group_std_mean": 0.268929660320282, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_10/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_10/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_10/group_std_mean": 0.268929660320282, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_15/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_15/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_15/group_std_mean": 0.268929660320282, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_20/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_20/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_20/group_std_mean": 0.268929660320282, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_25/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_25/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_25/group_std_mean": 0.268929660320282, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_5/centered_abs_mean": 0.21218776404857637, "signal/frontier_coverage_5/group_bin_occupancy": 0.883984375, "signal/frontier_coverage_5/group_std_mean": 0.268929660320282, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002652347134426236, "signal/frontier_ece_reward/centered_abs_mean": 0.02997433766722679, "signal/frontier_ece_reward/group_bin_occupancy": 0.63515625, "signal/frontier_ece_reward/group_std_mean": 0.03954430893063545, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029974338132888077, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029974338132888077, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1805424988269806, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.762109375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24203293323516845, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01805424988269806, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01805424988269806, "step": 110 }, { "calibration/aurc": 0.3790243716711346, "calibration/batch_distribution_entropy": 0.982709811496667, "calibration/batch_entropy_100bins": 0.9717901920414483, "calibration/batch_entropy_10bins": 0.982709811496667, "calibration/batch_entropy_50bins": 0.980121350130229, "calibration/batch_uniqueness": 0.954217529296875, "calibration/buffer_distribution_entropy": 0.9971913463073884, "calibration/buffer_entropy_100bins": 0.9857669408440536, "calibration/buffer_entropy_10bins": 0.9971913463073884, "calibration/buffer_entropy_50bins": 0.9926144523426681, "calibration/confidence_entropy": 0.4853503677467438, "calibration/coverage@0%": 0.003125, "calibration/coverage@1%": 0.003125, "calibration/coverage@10%": 0.024609375, "calibration/coverage@15%": 0.046875, "calibration/coverage@20%": 0.2328125, "calibration/coverage@25%": 0.32734375, "calibration/coverage@30%": 0.419921875, "calibration/coverage@5%": 0.003125, "calibration/ece": 0.1385865561118635, "calibration/mean_confidence": 0.5192355450283547, "calibration/prompt_uniqueness": 0.870849609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1157.8, "completions/max_terminated_length": 715.4, "completions/mean_length": 198.96796875, "completions/mean_terminated_length": 198.1828582763672, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.368, "grad_norm": 0.0008175044204108417, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 386223200.0, "reward": 0.9230035901069641, "reward_std": 0.08821378499269486, "rewards/accuracy_reward": 0.49833984375, "rewards/brier_reward": 0.7744688391685486, "rewards/confidence_uniqueness_reward": 0.9531052112579346, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0032609821762889625, "rewards/frontier_coverage_0": 0.11651851236820221, "rewards/frontier_coverage_1": 0.11651851236820221, "rewards/frontier_coverage_10": 0.11651851236820221, "rewards/frontier_coverage_15": 0.11651851236820221, "rewards/frontier_coverage_20": 0.11651851236820221, "rewards/frontier_coverage_25": 0.11651851236820221, "rewards/frontier_coverage_5": 0.11651851236820221, "rewards/frontier_ece_reward": 0.014127342030405998, "rewards/frontier_entropy_batch_reward": -0.10198113471269607, "signal/accuracy_reward/centered_abs_mean": 0.100238037109375, "signal/accuracy_reward/group_bin_occupancy": 0.175390625, "signal/accuracy_reward/group_std_mean": 0.13502895534038545, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0501190185546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0501190185546875, "signal/advantage_abs_mean": 0.06722283586859704, "signal/advantage_pre_scale_abs_mean": 0.06722283586859704, "signal/advantage_pre_scale_std": 0.11006049364805222, "signal/advantage_std": 0.11006049364805222, "signal/brier_reward/centered_abs_mean": 0.1503296136856079, "signal/brier_reward/group_bin_occupancy": 0.85, "signal/brier_reward/group_std_mean": 0.19333814978599548, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015032961405813694, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015032961405813694, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012458873353898526, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.953125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016741343960165977, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001245887391269207, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001245887391269207, "signal/format_reward/centered_abs_mean": 0.0010986328125, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0025827332865446806, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00054931640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00054931640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028565511573106052, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74765625, "signal/frontier_aurc_reward/group_std_mean": 0.004412284214049577, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.570688932086341e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.570688932086341e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_0/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_0/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_1/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_1/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_1/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_10/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_10/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_10/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_15/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_15/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_15/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_20/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_20/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_20/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_25/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_25/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_25/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_5/centered_abs_mean": 0.1842558950185776, "signal/frontier_coverage_5/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_5/group_std_mean": 0.23739778399467468, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023031987249851225, "signal/frontier_ece_reward/centered_abs_mean": 0.02585429698228836, "signal/frontier_ece_reward/group_bin_occupancy": 0.67109375, "signal/frontier_ece_reward/group_std_mean": 0.03366940915584564, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025854297447949646, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025854297447949646, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1619349092245102, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.804296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.21937708854675292, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016193491220474244, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016193491220474244, "step": 115 }, { "calibration/aurc": 0.3183561455024736, "calibration/batch_distribution_entropy": 0.9761403104003881, "calibration/batch_entropy_100bins": 0.9665729483264448, "calibration/batch_entropy_10bins": 0.9761403104003881, "calibration/batch_entropy_50bins": 0.9747901306945629, "calibration/batch_uniqueness": 0.9519656303020192, "calibration/buffer_distribution_entropy": 0.9990661585840115, "calibration/buffer_entropy_100bins": 0.9913789451496673, "calibration/buffer_entropy_10bins": 0.9990661585840115, "calibration/buffer_entropy_50bins": 0.9961616782652717, "calibration/confidence_entropy": 0.4738653115445815, "calibration/coverage@0%": 0.01641236545988258, "calibration/coverage@1%": 0.01641236545988258, "calibration/coverage@10%": 0.22210815190802347, "calibration/coverage@15%": 0.278392551369863, "calibration/coverage@20%": 0.31787396037182, "calibration/coverage@25%": 0.3725928021037182, "calibration/coverage@30%": 0.43044046599804303, "calibration/coverage@5%": 0.09390594422700586, "calibration/ece": 0.14317070799732728, "calibration/mean_confidence": 0.4663198053679688, "calibration/prompt_uniqueness": 0.8607313074488381, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1210.2, "completions/max_terminated_length": 720.4, "completions/mean_length": 199.19677734375, "completions/mean_terminated_length": 198.28481750488282, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.384, "grad_norm": 0.0007297234842553735, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 403119487.0, "reward": 0.9370161175727845, "reward_std": 0.09072954654693603, "rewards/accuracy_reward": 0.52900390625, "rewards/brier_reward": 0.7911636233329773, "rewards/confidence_uniqueness_reward": 0.9525709033012391, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002813473041169345, "rewards/frontier_coverage_0": 0.11049409657716751, "rewards/frontier_coverage_1": 0.11049409657716751, "rewards/frontier_coverage_10": 0.11049409657716751, "rewards/frontier_coverage_15": 0.11049409657716751, "rewards/frontier_coverage_20": 0.11049409657716751, "rewards/frontier_coverage_25": 0.11049409657716751, "rewards/frontier_coverage_5": 0.11049409657716751, "rewards/frontier_ece_reward": 0.013869478553533553, "rewards/frontier_entropy_batch_reward": -0.12488683462142944, "signal/accuracy_reward/centered_abs_mean": 0.104974365234375, "signal/accuracy_reward/group_bin_occupancy": 0.17734375, "signal/accuracy_reward/group_std_mean": 0.14191071838140487, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0524871826171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0524871826171875, "signal/advantage_abs_mean": 0.06853913813829422, "signal/advantage_pre_scale_abs_mean": 0.06853913813829422, "signal/advantage_pre_scale_std": 0.11065925359725952, "signal/advantage_std": 0.11065925359725952, "signal/brier_reward/centered_abs_mean": 0.14036066234111785, "signal/brier_reward/group_bin_occupancy": 0.849609375, "signal/brier_reward/group_std_mean": 0.18114359974861144, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014036066457629203, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014036066457629203, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013375256024301053, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95078125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018456452712416648, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013375255977734924, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013375255977734924, "signal/format_reward/centered_abs_mean": 0.0014892578125, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0037468004506081344, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00074462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00074462890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028811234049499037, "signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625, "signal/frontier_aurc_reward/group_std_mean": 0.004553025308996439, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6014043507748285e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6014043507748285e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_0/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_0/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_1/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_1/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_1/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_10/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_10/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_10/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_15/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_15/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_15/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_20/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_20/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_20/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_25/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_25/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_25/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_5/centered_abs_mean": 0.18208499848842621, "signal/frontier_coverage_5/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_5/group_std_mean": 0.2352500468492508, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022760625462979077, "signal/frontier_ece_reward/centered_abs_mean": 0.02035584971308708, "signal/frontier_ece_reward/group_bin_occupancy": 0.685546875, "signal/frontier_ece_reward/group_std_mean": 0.026301588490605356, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020355849992483854, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020355849992483854, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1847107857465744, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.79296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24139588475227355, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0184710793197155, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0184710793197155, "step": 120 }, { "calibration/aurc": 0.4323062871824819, "calibration/batch_distribution_entropy": 0.9845003654379166, "calibration/batch_entropy_100bins": 0.9709600544841221, "calibration/batch_entropy_10bins": 0.9845003654379166, "calibration/batch_entropy_50bins": 0.9813723090983913, "calibration/batch_uniqueness": 0.9541264325075541, "calibration/buffer_distribution_entropy": 0.9991648742985866, "calibration/buffer_entropy_100bins": 0.9930824204994557, "calibration/buffer_entropy_10bins": 0.9991648742985866, "calibration/buffer_entropy_50bins": 0.9971470754608687, "calibration/confidence_entropy": 0.49521664288064526, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.001171875, "calibration/coverage@15%": 0.001171875, "calibration/coverage@20%": 0.013671875, "calibration/coverage@25%": 0.058984375, "calibration/coverage@30%": 0.15415134803921568, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.15455243215267175, "calibration/mean_confidence": 0.5068435462847125, "calibration/prompt_uniqueness": 0.8713734809027777, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1140.8, "completions/max_terminated_length": 579.8, "completions/mean_length": 196.72412109375, "completions/mean_terminated_length": 196.0713348388672, "completions/min_length": 91.8, "completions/min_terminated_length": 91.8, "epoch": 0.4, "grad_norm": 0.0010661915875971317, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 420170390.0, "reward": 0.9237029790878296, "reward_std": 0.10037501901388168, "rewards/accuracy_reward": 0.51015625, "rewards/brier_reward": 0.7678788423538208, "rewards/confidence_uniqueness_reward": 0.9519475817680358, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0038750086445361377, "rewards/frontier_coverage_0": 0.10178494267165661, "rewards/frontier_coverage_1": 0.10178494267165661, "rewards/frontier_coverage_10": 0.10178494267165661, "rewards/frontier_coverage_15": 0.10178494267165661, "rewards/frontier_coverage_20": 0.10178494267165661, "rewards/frontier_coverage_25": 0.10178494267165661, "rewards/frontier_coverage_5": 0.10178494267165661, "rewards/frontier_ece_reward": 0.009860872942954301, "rewards/frontier_entropy_batch_reward": -0.12810986787080764, "signal/accuracy_reward/centered_abs_mean": 0.12548828125, "signal/accuracy_reward/group_bin_occupancy": 0.18203125, "signal/accuracy_reward/group_std_mean": 0.16264511346817018, "signal/accuracy_reward/group_zero_std_frac": 0.54375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062744140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062744140625, "signal/advantage_abs_mean": 0.07850312739610672, "signal/advantage_pre_scale_abs_mean": 0.07850312739610672, "signal/advantage_pre_scale_std": 0.12315381318330765, "signal/advantage_std": 0.12315381318330765, "signal/brier_reward/centered_abs_mean": 0.1579117149114609, "signal/brier_reward/group_bin_occupancy": 0.8765625, "signal/brier_reward/group_std_mean": 0.19992058277130126, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015791171602904795, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015791171602904795, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013566328212618828, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.938671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.019081654772162438, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001356632891111076, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001356632891111076, "signal/format_reward/centered_abs_mean": 0.00150146484375, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.0040831089485436674, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000750732421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.004184631397947669, "signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375, "signal/frontier_aurc_reward/group_std_mean": 0.006817103549838066, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.230789611232467e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.230789611232467e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_0/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_0/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_1/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_1/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_1/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_10/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_10/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_10/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_15/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_15/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_15/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_20/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_20/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_20/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_25/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_25/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_25/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_5/centered_abs_mean": 0.1897767126560211, "signal/frontier_coverage_5/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_5/group_std_mean": 0.24595032632350922, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00237220898270607, "signal/frontier_ece_reward/centered_abs_mean": 0.018028103187680243, "signal/frontier_ece_reward/group_bin_occupancy": 0.69921875, "signal/frontier_ece_reward/group_std_mean": 0.023144710436463357, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018028103280812503, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018028103280812503, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18568139374256135, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.759375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24778930544853212, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018568138778209686, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018568138778209686, "step": 125 }, { "calibration/aurc": 0.32245475702575754, "calibration/batch_distribution_entropy": 0.978982327771489, "calibration/batch_entropy_100bins": 0.9670318034930006, "calibration/batch_entropy_10bins": 0.978982327771489, "calibration/batch_entropy_50bins": 0.9769518685366212, "calibration/batch_uniqueness": 0.9534027099609375, "calibration/buffer_distribution_entropy": 0.999017226336244, "calibration/buffer_entropy_100bins": 0.9933569492297052, "calibration/buffer_entropy_10bins": 0.999017226336244, "calibration/buffer_entropy_50bins": 0.997349282060987, "calibration/confidence_entropy": 0.5102350146041745, "calibration/coverage@0%": 0.006640625, "calibration/coverage@1%": 0.006640625, "calibration/coverage@10%": 0.022265625, "calibration/coverage@15%": 0.065625, "calibration/coverage@20%": 0.127734375, "calibration/coverage@25%": 0.27734375, "calibration/coverage@30%": 0.471484375, "calibration/coverage@5%": 0.014453125, "calibration/ece": 0.10677393080062363, "calibration/mean_confidence": 0.51815231260255, "calibration/prompt_uniqueness": 0.870751953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 960.8, "completions/max_terminated_length": 752.4, "completions/mean_length": 198.9875, "completions/mean_terminated_length": 198.72580261230468, "completions/min_length": 92.6, "completions/min_terminated_length": 92.6, "epoch": 0.416, "grad_norm": 0.0009462664602324367, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 437089206.0, "reward": 0.9285161018371582, "reward_std": 0.09302979856729507, "rewards/accuracy_reward": 0.51337890625, "rewards/brier_reward": 0.7765425562858581, "rewards/confidence_uniqueness_reward": 0.953677773475647, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0032080319710075856, "rewards/frontier_coverage_0": 0.10404052138328553, "rewards/frontier_coverage_1": 0.10404052138328553, "rewards/frontier_coverage_10": 0.10404052138328553, "rewards/frontier_coverage_15": 0.10404052138328553, "rewards/frontier_coverage_20": 0.10404052138328553, "rewards/frontier_coverage_25": 0.10326478481292725, "rewards/frontier_coverage_5": 0.10404052138328553, "rewards/frontier_ece_reward": 0.009166896902024746, "rewards/frontier_entropy_batch_reward": -0.11019333750009537, "signal/accuracy_reward/centered_abs_mean": 0.115802001953125, "signal/accuracy_reward/group_bin_occupancy": 0.17890625, "signal/accuracy_reward/group_std_mean": 0.151393261551857, "signal/accuracy_reward/group_zero_std_frac": 0.56875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0579010009765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0579010009765625, "signal/advantage_abs_mean": 0.07270590215921402, "signal/advantage_pre_scale_abs_mean": 0.07270590215921402, "signal/advantage_pre_scale_std": 0.1149192675948143, "signal/advantage_std": 0.1149192675948143, "signal/brier_reward/centered_abs_mean": 0.1494935095310211, "signal/brier_reward/group_bin_occupancy": 0.863671875, "signal/brier_reward/group_std_mean": 0.18933481872081756, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01494935117661953, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01494935117661953, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012433665059506892, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.948828125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016522933915257453, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001243366557173431, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001243366557173431, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003389076329767704, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70703125, "signal/frontier_aurc_reward/group_std_mean": 0.005695812962949276, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2363452666904775e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2363452666904775e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19482134580612182, "signal/frontier_coverage_0/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_0/group_std_mean": 0.24719403684139252, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_1/centered_abs_mean": 0.19482134580612182, "signal/frontier_coverage_1/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_1/group_std_mean": 0.24719403684139252, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_10/centered_abs_mean": 0.19482134580612182, "signal/frontier_coverage_10/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_10/group_std_mean": 0.24719403684139252, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_15/centered_abs_mean": 0.19482134580612182, "signal/frontier_coverage_15/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_15/group_std_mean": 0.24719403684139252, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_20/centered_abs_mean": 0.19482134580612182, "signal/frontier_coverage_20/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_20/group_std_mean": 0.24719403684139252, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_25/centered_abs_mean": 0.19220769107341767, "signal/frontier_coverage_25/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_25/group_std_mean": 0.2439906269311905, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024025961756706238, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024025961756706238, "signal/frontier_coverage_5/centered_abs_mean": 0.19482134580612182, "signal/frontier_coverage_5/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_5/group_std_mean": 0.24719403684139252, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024352668784558772, "signal/frontier_ece_reward/centered_abs_mean": 0.015177857503294945, "signal/frontier_ece_reward/group_bin_occupancy": 0.70546875, "signal/frontier_ece_reward/group_std_mean": 0.019421032071113585, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015177857596427203, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015177857596427203, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17526901960372926, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.784765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2358124166727066, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017526903189718723, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017526903189718723, "step": 130 }, { "calibration/aurc": 0.27758967566370596, "calibration/batch_distribution_entropy": 0.9851777728674534, "calibration/batch_entropy_100bins": 0.9700682358713884, "calibration/batch_entropy_10bins": 0.9851777728674534, "calibration/batch_entropy_50bins": 0.9804977790517506, "calibration/batch_uniqueness": 0.9554472278853534, "calibration/buffer_distribution_entropy": 0.9991109222945178, "calibration/buffer_entropy_100bins": 0.9936594928536131, "calibration/buffer_entropy_10bins": 0.9991109222945178, "calibration/buffer_entropy_50bins": 0.9975580426679944, "calibration/confidence_entropy": 0.4774470200653253, "calibration/coverage@0%": 0.003912365459882583, "calibration/coverage@1%": 0.003912365459882583, "calibration/coverage@10%": 0.07587756849315068, "calibration/coverage@15%": 0.16028620352250486, "calibration/coverage@20%": 0.3328346685420744, "calibration/coverage@25%": 0.4513515166340508, "calibration/coverage@30%": 0.5909407106164384, "calibration/coverage@5%": 0.003912365459882583, "calibration/ece": 0.10711158925252093, "calibration/mean_confidence": 0.5236740097988279, "calibration/prompt_uniqueness": 0.8622971676638918, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 750.4, "completions/max_terminated_length": 555.0, "completions/mean_length": 195.87373046875, "completions/mean_terminated_length": 195.74309997558595, "completions/min_length": 94.8, "completions/min_terminated_length": 94.8, "epoch": 0.432, "grad_norm": 0.0009695117478258908, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 454109289.0, "reward": 0.9475380301475524, "reward_std": 0.08842353671789169, "rewards/accuracy_reward": 0.54765625, "rewards/brier_reward": 0.7932500720024109, "rewards/confidence_uniqueness_reward": 0.9542429208755493, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0026097355876117945, "rewards/frontier_coverage_0": 0.10053790956735612, "rewards/frontier_coverage_1": 0.10053790956735612, "rewards/frontier_coverage_10": 0.10053790956735612, "rewards/frontier_coverage_15": 0.10053790956735612, "rewards/frontier_coverage_20": 0.10053790956735612, "rewards/frontier_coverage_25": 0.0982507586479187, "rewards/frontier_coverage_5": 0.10053790956735612, "rewards/frontier_ece_reward": 0.009860789589583873, "rewards/frontier_entropy_batch_reward": -0.10614849850535393, "signal/accuracy_reward/centered_abs_mean": 0.11329345703125, "signal/accuracy_reward/group_bin_occupancy": 0.176953125, "signal/accuracy_reward/group_std_mean": 0.14613474607467652, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056646728515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.056646728515625, "signal/advantage_abs_mean": 0.06929384395480156, "signal/advantage_pre_scale_abs_mean": 0.06929384395480156, "signal/advantage_pre_scale_std": 0.11137249916791916, "signal/advantage_std": 0.11137249916791916, "signal/brier_reward/centered_abs_mean": 0.1398220479488373, "signal/brier_reward/group_bin_occupancy": 0.830078125, "signal/brier_reward/group_std_mean": 0.17961786389350892, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013982205092906952, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013982205092906952, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011601312272250652, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.953515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.015428530983626842, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001160131278447807, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001160131278447807, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003001504810526967, "signal/frontier_aurc_reward/group_bin_occupancy": 0.684375, "signal/frontier_aurc_reward/group_std_mean": 0.005159206409007311, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.751881013158709e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.751881013158709e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18813469707965852, "signal/frontier_coverage_0/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_0/group_std_mean": 0.23900977075099944, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_1/centered_abs_mean": 0.18813469707965852, "signal/frontier_coverage_1/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_1/group_std_mean": 0.23900977075099944, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_10/centered_abs_mean": 0.18813469707965852, "signal/frontier_coverage_10/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_10/group_std_mean": 0.23900977075099944, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_15/centered_abs_mean": 0.18813469707965852, "signal/frontier_coverage_15/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_15/group_std_mean": 0.23900977075099944, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_20/centered_abs_mean": 0.18813469707965852, "signal/frontier_coverage_20/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_20/group_std_mean": 0.23900977075099944, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_25/centered_abs_mean": 0.18071556687355042, "signal/frontier_coverage_25/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_25/group_std_mean": 0.22983711957931519, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022589446045458315, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022589446045458315, "signal/frontier_coverage_5/centered_abs_mean": 0.18813469707965852, "signal/frontier_coverage_5/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_5/group_std_mean": 0.23900977075099944, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023516837507486345, "signal/frontier_ece_reward/centered_abs_mean": 0.013565455190837383, "signal/frontier_ece_reward/group_bin_occupancy": 0.702734375, "signal/frontier_ece_reward/group_std_mean": 0.01733413077890873, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013565455097705125, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013565455097705125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1644774168729782, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.791015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.21864676475524902, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016447741910815238, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016447741910815238, "step": 135 }, { "calibration/aurc": 0.3032347508308273, "calibration/batch_distribution_entropy": 0.9762598969677517, "calibration/batch_entropy_100bins": 0.9644103028515179, "calibration/batch_entropy_10bins": 0.9762598969677517, "calibration/batch_entropy_50bins": 0.9749709313958501, "calibration/batch_uniqueness": 0.953617455405724, "calibration/buffer_distribution_entropy": 0.9986577848398988, "calibration/buffer_entropy_100bins": 0.993577532222066, "calibration/buffer_entropy_10bins": 0.9986577848398988, "calibration/buffer_entropy_50bins": 0.9973486915751801, "calibration/confidence_entropy": 0.49841096598646917, "calibration/coverage@0%": 0.007428754892367906, "calibration/coverage@1%": 0.007428754892367906, "calibration/coverage@10%": 0.07159215998043053, "calibration/coverage@15%": 0.145873593444227, "calibration/coverage@20%": 0.18615230552837575, "calibration/coverage@25%": 0.31355109466731895, "calibration/coverage@30%": 0.4327329990215264, "calibration/coverage@5%": 0.007428754892367906, "calibration/ece": 0.12669799261164544, "calibration/mean_confidence": 0.5609690276065933, "calibration/prompt_uniqueness": 0.8721019669452394, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 706.4, "completions/max_terminated_length": 494.0, "completions/mean_length": 200.08984375, "completions/mean_terminated_length": 199.95912170410156, "completions/min_length": 90.4, "completions/min_terminated_length": 90.4, "epoch": 0.448, "grad_norm": 0.0010598563821986318, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 471111009.0, "reward": 0.9310909390449524, "reward_std": 0.08768343180418015, "rewards/accuracy_reward": 0.51494140625, "rewards/brier_reward": 0.7858774423599243, "rewards/confidence_uniqueness_reward": 0.9541089057922363, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003412813926115632, "rewards/frontier_coverage_0": 0.11701681315898896, "rewards/frontier_coverage_1": 0.11701681315898896, "rewards/frontier_coverage_10": 0.11701681315898896, "rewards/frontier_coverage_15": 0.11701681315898896, "rewards/frontier_coverage_20": 0.11701681315898896, "rewards/frontier_coverage_25": 0.11449546068906784, "rewards/frontier_coverage_5": 0.11701681315898896, "rewards/frontier_ece_reward": 0.007894697599112988, "rewards/frontier_entropy_batch_reward": -0.11235017627477646, "signal/accuracy_reward/centered_abs_mean": 0.107989501953125, "signal/accuracy_reward/group_bin_occupancy": 0.1734375, "signal/accuracy_reward/group_std_mean": 0.13963166922330855, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0539947509765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0539947509765625, "signal/advantage_abs_mean": 0.0684954434633255, "signal/advantage_pre_scale_abs_mean": 0.0684954434633255, "signal/advantage_pre_scale_std": 0.11005422621965408, "signal/advantage_std": 0.11005422621965408, "signal/brier_reward/centered_abs_mean": 0.1446717381477356, "signal/brier_reward/group_bin_occupancy": 0.853515625, "signal/brier_reward/group_std_mean": 0.18473469018936156, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014467174373567104, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014467174373567104, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012451625987887382, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016036957129836084, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001245162612758577, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001245162612758577, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035400362219661476, "signal/frontier_aurc_reward/group_bin_occupancy": 0.688671875, "signal/frontier_aurc_reward/group_std_mean": 0.006115310266613961, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.425045408424921e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.425045408424921e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18895358443260193, "signal/frontier_coverage_0/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_0/group_std_mean": 0.2398217111825943, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_1/centered_abs_mean": 0.18895358443260193, "signal/frontier_coverage_1/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_1/group_std_mean": 0.2398217111825943, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_10/centered_abs_mean": 0.18895358443260193, "signal/frontier_coverage_10/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_10/group_std_mean": 0.2398217111825943, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_15/centered_abs_mean": 0.18895358443260193, "signal/frontier_coverage_15/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_15/group_std_mean": 0.2398217111825943, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_20/centered_abs_mean": 0.18895358443260193, "signal/frontier_coverage_20/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_20/group_std_mean": 0.2398217111825943, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_25/centered_abs_mean": 0.1779042273759842, "signal/frontier_coverage_25/group_bin_occupancy": 0.884375, "signal/frontier_coverage_25/group_std_mean": 0.2262539952993393, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022238029167056085, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022238029167056085, "signal/frontier_coverage_5/centered_abs_mean": 0.18895358443260193, "signal/frontier_coverage_5/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_5/group_std_mean": 0.2398217111825943, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023619199171662332, "signal/frontier_ece_reward/centered_abs_mean": 0.012882906198501586, "signal/frontier_ece_reward/group_bin_occupancy": 0.704296875, "signal/frontier_ece_reward/group_std_mean": 0.016543524339795113, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001288290647789836, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001288290647789836, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17680651247501372, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23934744596481322, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01768065169453621, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01768065169453621, "step": 140 }, { "calibration/aurc": 0.4091879941407811, "calibration/batch_distribution_entropy": 0.9838473609296932, "calibration/batch_entropy_100bins": 0.9688931964996506, "calibration/batch_entropy_10bins": 0.9838473609296932, "calibration/batch_entropy_50bins": 0.978368933561738, "calibration/batch_uniqueness": 0.954656406723646, "calibration/buffer_distribution_entropy": 0.9980712456007629, "calibration/buffer_entropy_100bins": 0.9934814520723938, "calibration/buffer_entropy_10bins": 0.9980712456007629, "calibration/buffer_entropy_50bins": 0.9970431565305529, "calibration/confidence_entropy": 0.5096224146267194, "calibration/coverage@0%": 0.009782472492421626, "calibration/coverage@1%": 0.009782472492421626, "calibration/coverage@10%": 0.010955879355166724, "calibration/coverage@15%": 0.010955879355166724, "calibration/coverage@20%": 0.06653339161006869, "calibration/coverage@25%": 0.12958179547503934, "calibration/coverage@30%": 0.2461504229260197, "calibration/coverage@5%": 0.009782472492421626, "calibration/ece": 0.1322591503385764, "calibration/mean_confidence": 0.4909400675946268, "calibration/prompt_uniqueness": 0.8706819881633715, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1454.8, "completions/max_terminated_length": 715.6, "completions/mean_length": 202.88876953125, "completions/mean_terminated_length": 202.2366149902344, "completions/min_length": 90.8, "completions/min_terminated_length": 90.8, "epoch": 0.464, "grad_norm": 0.0008505330188199878, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 488359406.0, "reward": 0.9026776432991028, "reward_std": 0.0831810936331749, "rewards/accuracy_reward": 0.46337890625, "rewards/brier_reward": 0.7662590742111206, "rewards/confidence_uniqueness_reward": 0.9531535387039185, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0037991167046129704, "rewards/frontier_coverage_0": 0.13223221004009247, "rewards/frontier_coverage_1": 0.13223221004009247, "rewards/frontier_coverage_10": 0.13223221004009247, "rewards/frontier_coverage_15": 0.13223221004009247, "rewards/frontier_coverage_20": 0.13223221004009247, "rewards/frontier_coverage_25": 0.12470296993851662, "rewards/frontier_coverage_5": 0.13223221004009247, "rewards/frontier_ece_reward": 0.0060929754748940464, "rewards/frontier_entropy_batch_reward": -0.12698111385107042, "signal/accuracy_reward/centered_abs_mean": 0.086773681640625, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.11853125244379044, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433868408203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0433868408203125, "signal/advantage_abs_mean": 0.06301689743995667, "signal/advantage_pre_scale_abs_mean": 0.06301689743995667, "signal/advantage_pre_scale_std": 0.10428053140640259, "signal/advantage_std": 0.10428053140640259, "signal/brier_reward/centered_abs_mean": 0.14725424647331237, "signal/brier_reward/group_bin_occupancy": 0.86015625, "signal/brier_reward/group_std_mean": 0.18804004192352294, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014725425094366074, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014725425094366074, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013746090233325958, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.936328125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01900733485817909, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013746090233325958, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013746090233325958, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034574420657008885, "signal/frontier_aurc_reward/group_bin_occupancy": 0.702734375, "signal/frontier_aurc_reward/group_std_mean": 0.005849315505474806, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3218026257818563e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3218026257818563e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18060636520385742, "signal/frontier_coverage_0/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_0/group_std_mean": 0.2308868497610092, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_1/centered_abs_mean": 0.18060636520385742, "signal/frontier_coverage_1/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_1/group_std_mean": 0.2308868497610092, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_10/centered_abs_mean": 0.18060636520385742, "signal/frontier_coverage_10/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_10/group_std_mean": 0.2308868497610092, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_15/centered_abs_mean": 0.18060636520385742, "signal/frontier_coverage_15/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_15/group_std_mean": 0.2308868497610092, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_20/centered_abs_mean": 0.18060636520385742, "signal/frontier_coverage_20/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_20/group_std_mean": 0.2308868497610092, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_25/centered_abs_mean": 0.16891390979290008, "signal/frontier_coverage_25/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_25/group_std_mean": 0.21626182198524474, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021114239003509282, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021114239003509282, "signal/frontier_coverage_5/centered_abs_mean": 0.18060636520385742, "signal/frontier_coverage_5/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_5/group_std_mean": 0.2308868497610092, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022575796116143464, "signal/frontier_ece_reward/centered_abs_mean": 0.011543591693043708, "signal/frontier_ece_reward/group_bin_occupancy": 0.698828125, "signal/frontier_ece_reward/group_std_mean": 0.015037482790648937, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011543591972440481, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011543591972440481, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18974616825580598, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25198211073875426, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01897461786866188, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01897461786866188, "step": 145 }, { "calibration/aurc": 0.29942965486469897, "calibration/batch_distribution_entropy": 0.9821019550872274, "calibration/batch_entropy_100bins": 0.9658469089767218, "calibration/batch_entropy_10bins": 0.9821019550872274, "calibration/batch_entropy_50bins": 0.9783809273107217, "calibration/batch_uniqueness": 0.9548924237184917, "calibration/buffer_distribution_entropy": 0.9978537146074318, "calibration/buffer_entropy_100bins": 0.9935547771121023, "calibration/buffer_entropy_10bins": 0.9978537146074318, "calibration/buffer_entropy_50bins": 0.9969228501511747, "calibration/confidence_entropy": 0.49102090478939714, "calibration/coverage@0%": 0.018359375, "calibration/coverage@1%": 0.018359375, "calibration/coverage@10%": 0.045703125, "calibration/coverage@15%": 0.078515625, "calibration/coverage@20%": 0.3109375, "calibration/coverage@25%": 0.383984375, "calibration/coverage@30%": 0.506640625, "calibration/coverage@5%": 0.019921875, "calibration/ece": 0.13898620416440183, "calibration/mean_confidence": 0.500086290529229, "calibration/prompt_uniqueness": 0.8661629731399584, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1326.0, "completions/max_terminated_length": 534.4, "completions/mean_length": 200.81513671875, "completions/mean_terminated_length": 199.7722595214844, "completions/min_length": 92.4, "completions/min_terminated_length": 92.4, "epoch": 0.48, "grad_norm": 0.00103501055855304, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 505463785.0, "reward": 0.9328296899795532, "reward_std": 0.08904698044061661, "rewards/accuracy_reward": 0.5251953125, "rewards/brier_reward": 0.7724361300468445, "rewards/confidence_uniqueness_reward": 0.9536016345024109, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.002996008098125458, "rewards/frontier_coverage_0": 0.10110445320606232, "rewards/frontier_coverage_1": 0.10110445320606232, "rewards/frontier_coverage_10": 0.10110445320606232, "rewards/frontier_coverage_15": 0.10110445320606232, "rewards/frontier_coverage_20": 0.10104168802499772, "rewards/frontier_coverage_25": 0.09731045961380005, "rewards/frontier_coverage_5": 0.10110445320606232, "rewards/frontier_ece_reward": 0.006894841138273477, "rewards/frontier_entropy_batch_reward": -0.11382773965597152, "signal/accuracy_reward/centered_abs_mean": 0.1135009765625, "signal/accuracy_reward/group_bin_occupancy": 0.176953125, "signal/accuracy_reward/group_std_mean": 0.1488563358783722, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05675048828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05675048828125, "signal/advantage_abs_mean": 0.06805618703365326, "signal/advantage_pre_scale_abs_mean": 0.06805618703365326, "signal/advantage_pre_scale_std": 0.11172468066215516, "signal/advantage_std": 0.11172468066215516, "signal/brier_reward/centered_abs_mean": 0.14411163926124573, "signal/brier_reward/group_bin_occupancy": 0.846484375, "signal/brier_reward/group_std_mean": 0.1837744355201721, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0144111642614007, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0144111642614007, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01357480175793171, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.92734375, "signal/confidence_uniqueness_reward/group_std_mean": 0.01972918212413788, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013574802316725254, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013574802316725254, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_bin_occupancy": 0.128515625, "signal/format_reward/group_std_mean": 0.004971844470128417, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002930343523621559, "signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375, "signal/frontier_aurc_reward/group_std_mean": 0.00489910626783967, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6629295937018466e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6629295937018466e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19496614634990692, "signal/frontier_coverage_0/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_0/group_std_mean": 0.24985696673393248, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_1/centered_abs_mean": 0.19496614634990692, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.24985696673393248, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_10/centered_abs_mean": 0.19496614634990692, "signal/frontier_coverage_10/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_10/group_std_mean": 0.24985696673393248, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_15/centered_abs_mean": 0.19496614634990692, "signal/frontier_coverage_15/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_15/group_std_mean": 0.24985696673393248, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_20/centered_abs_mean": 0.1944173663854599, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.24920360147953033, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024302172008901835, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024302172008901835, "signal/frontier_coverage_25/centered_abs_mean": 0.17979943454265596, "signal/frontier_coverage_25/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_25/group_std_mean": 0.23114030063152313, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022474929224699735, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022474929224699735, "signal/frontier_coverage_5/centered_abs_mean": 0.19496614634990692, "signal/frontier_coverage_5/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_5/group_std_mean": 0.24985696673393248, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024370769038796424, "signal/frontier_ece_reward/centered_abs_mean": 0.011257660388946534, "signal/frontier_ece_reward/group_bin_occupancy": 0.68515625, "signal/frontier_ece_reward/group_std_mean": 0.014584441110491753, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011257660342380404, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011257660342380404, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17291922271251678, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23585031032562256, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017291922867298127, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017291922867298127, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.47709188794813834, "eval_calibration/batch_distribution_entropy": 0.9182471569237116, "eval_calibration/batch_entropy_100bins": 0.6967083197710886, "eval_calibration/batch_entropy_10bins": 0.9182471569237116, "eval_calibration/batch_entropy_50bins": 0.78747214973767, "eval_calibration/batch_uniqueness": 0.8984375, "eval_calibration/buffer_distribution_entropy": 0.9978553291331319, "eval_calibration/buffer_entropy_100bins": 0.9937933422243487, "eval_calibration/buffer_entropy_10bins": 0.9978553291331319, "eval_calibration/buffer_entropy_50bins": 0.9970143809817915, "eval_calibration/confidence_entropy": 0.48359738329176344, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.0859375, "eval_calibration/coverage@25%": 0.1875, "eval_calibration/coverage@30%": 0.28125, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.19095125563443616, "eval_calibration/mean_confidence": 0.472773278943161, "eval_calibration/prompt_uniqueness": 0.8984375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 369.75, "eval_completions/max_terminated_length": 369.75, "eval_completions/mean_length": 199.24003219604492, "eval_completions/mean_terminated_length": 199.24003219604492, "eval_completions/min_length": 107.5, "eval_completions/min_terminated_length": 107.5, "eval_loss": 0.0, "eval_num_tokens": 505463785.0, "eval_reward": 0.834217444062233, "eval_reward_std": 0.22967081889510155, "eval_rewards/accuracy_reward": 0.427734375, "eval_rewards/brier_reward": 0.7905289083719254, "eval_rewards/confidence_uniqueness_reward": 0.899658203125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004185517493169755, "eval_rewards/frontier_coverage_0": 0.17981769144535065, "eval_rewards/frontier_coverage_1": 0.17981769144535065, "eval_rewards/frontier_coverage_10": 0.17981769144535065, "eval_rewards/frontier_coverage_15": 0.17981769144535065, "eval_rewards/frontier_coverage_20": 0.17774366959929466, "eval_rewards/frontier_coverage_25": 0.15828291699290276, "eval_rewards/frontier_coverage_5": 0.17981769144535065, "eval_rewards/frontier_ece_reward": 0.0069712207186967134, "eval_rewards/frontier_entropy_batch_reward": -0.64752197265625, "eval_runtime": 19.8063, "eval_samples_per_second": 25.245, "eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49310608208179474, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875, "eval_signal/advantage_abs_mean": 0.2121136114001274, "eval_signal/advantage_pre_scale_abs_mean": 0.2121136114001274, "eval_signal/advantage_pre_scale_std": 0.22733867913484573, "eval_signal/advantage_std": 0.22733867913484573, "eval_signal/brier_reward/centered_abs_mean": 0.19264057651162148, "eval_signal/brier_reward/group_bin_occupancy": 0.875, "eval_signal/brier_reward/group_std_mean": 0.24780448526144028, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019264057744294405, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019264057744294405, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.03955078125, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.046518636867403984, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003955078369472176, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003955078369472176, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005617183982394636, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6171875, "eval_signal/frontier_aurc_reward/group_std_mean": 0.011223837500438094, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.021479905233718e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.021479905233718e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.34257589280605316, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_0/group_std_mean": 0.4243193119764328, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.34257589280605316, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.4243193119764328, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.34257589280605316, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.4243193119764328, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.34257589280605316, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.4243193119764328, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.33809472620487213, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_20/group_std_mean": 0.41909338533878326, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0042261844500899315, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0042261844500899315, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.30766793340444565, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_25/group_std_mean": 0.3833855912089348, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003845849307253957, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003845849307253957, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.34257589280605316, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.4243193119764328, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004282198729924858, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013124892022460699, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8359375, "eval_signal/frontier_ece_reward/group_std_mean": 0.01785027328878641, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013124891556799412, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013124891556799412, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3224372863769531, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33711399137973785, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03224372863769531, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03224372863769531, "eval_steps_per_second": 0.202, "step": 150 }, { "calibration/aurc": 0.39113271435480473, "calibration/batch_distribution_entropy": 0.9871660215184697, "calibration/batch_entropy_100bins": 0.9700699867201148, "calibration/batch_entropy_10bins": 0.9871660215184697, "calibration/batch_entropy_50bins": 0.9807550994581791, "calibration/batch_uniqueness": 0.9560546875, "calibration/buffer_distribution_entropy": 0.9978919984582358, "calibration/buffer_entropy_100bins": 0.9939673543065268, "calibration/buffer_entropy_10bins": 0.9978919984582358, "calibration/buffer_entropy_50bins": 0.9971106974993134, "calibration/confidence_entropy": 0.4840377475415253, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.082421875, "calibration/coverage@15%": 0.145703125, "calibration/coverage@20%": 0.175390625, "calibration/coverage@25%": 0.234765625, "calibration/coverage@30%": 0.273046875, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.13277360800522978, "calibration/mean_confidence": 0.5166870016179566, "calibration/prompt_uniqueness": 0.863623046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 548.8, "completions/max_terminated_length": 548.8, "completions/mean_length": 199.92080078125, "completions/mean_terminated_length": 199.92080078125, "completions/min_length": 94.6, "completions/min_terminated_length": 94.6, "epoch": 0.496, "grad_norm": 0.0008479790994897485, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 522818814.0, "reward": 0.9452289819717408, "reward_std": 0.08597002923488617, "rewards/accuracy_reward": 0.5462890625, "rewards/brier_reward": 0.779764711856842, "rewards/confidence_uniqueness_reward": 0.9554817199707031, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003104729810729623, "rewards/frontier_coverage_0": 0.08789373338222503, "rewards/frontier_coverage_1": 0.08789373338222503, "rewards/frontier_coverage_10": 0.08789373338222503, "rewards/frontier_coverage_15": 0.08793908208608628, "rewards/frontier_coverage_20": 0.0870589330792427, "rewards/frontier_coverage_25": 0.07420700564980506, "rewards/frontier_coverage_5": 0.08789373338222503, "rewards/frontier_ece_reward": 0.006637497898191213, "rewards/frontier_entropy_batch_reward": -0.09574897587299347, "signal/accuracy_reward/centered_abs_mean": 0.1052490234375, "signal/accuracy_reward/group_bin_occupancy": 0.17578125, "signal/accuracy_reward/group_std_mean": 0.1405972883105278, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05262451171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05262451171875, "signal/advantage_abs_mean": 0.06608989387750626, "signal/advantage_pre_scale_abs_mean": 0.06608989387750626, "signal/advantage_pre_scale_std": 0.10801601260900498, "signal/advantage_std": 0.10801601260900498, "signal/brier_reward/centered_abs_mean": 0.13847643435001372, "signal/brier_reward/group_bin_occupancy": 0.8671875, "signal/brier_reward/group_std_mean": 0.17648339569568633, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013847643136978149, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013847643136978149, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011437726020812989, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.014382154121994973, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011437725741416215, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011437725741416215, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.003300163522362709, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7109375, "signal/frontier_aurc_reward/group_std_mean": 0.005784228537231683, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.125204504816793e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.125204504816793e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17669629752635957, "signal/frontier_coverage_0/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_0/group_std_mean": 0.2273976117372513, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_1/centered_abs_mean": 0.17669629752635957, "signal/frontier_coverage_1/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_1/group_std_mean": 0.2273976117372513, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_10/centered_abs_mean": 0.17669629752635957, "signal/frontier_coverage_10/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_10/group_std_mean": 0.2273976117372513, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_15/centered_abs_mean": 0.1764056235551834, "signal/frontier_coverage_15/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_15/group_std_mean": 0.22704406976699829, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022050703410059214, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022050703410059214, "signal/frontier_coverage_20/centered_abs_mean": 0.17391692698001862, "signal/frontier_coverage_20/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_20/group_std_mean": 0.22390609383583068, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021739616990089417, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021739616990089417, "signal/frontier_coverage_25/centered_abs_mean": 0.151907816529274, "signal/frontier_coverage_25/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_25/group_std_mean": 0.19601451158523558, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018988477066159248, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018988477066159248, "signal/frontier_coverage_5/centered_abs_mean": 0.17669629752635957, "signal/frontier_coverage_5/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_5/group_std_mean": 0.2273976117372513, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022087037097662686, "signal/frontier_ece_reward/centered_abs_mean": 0.010442128777503968, "signal/frontier_ece_reward/group_bin_occupancy": 0.69296875, "signal/frontier_ece_reward/group_std_mean": 0.013436480797827244, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010442128870636225, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010442128870636225, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15976455807685852, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.21749602854251862, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015976456366479395, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015976456366479395, "step": 155 }, { "calibration/aurc": 0.3198817575673089, "calibration/batch_distribution_entropy": 0.9839418406007525, "calibration/batch_entropy_100bins": 0.9679700342611737, "calibration/batch_entropy_10bins": 0.9839418406007525, "calibration/batch_entropy_50bins": 0.9785860503623137, "calibration/batch_uniqueness": 0.954736328125, "calibration/buffer_distribution_entropy": 0.9980732134663842, "calibration/buffer_entropy_100bins": 0.9944633582043301, "calibration/buffer_entropy_10bins": 0.9980732134663842, "calibration/buffer_entropy_50bins": 0.9973456517162692, "calibration/confidence_entropy": 0.5027024811359091, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.10859375, "calibration/coverage@15%": 0.22421875, "calibration/coverage@20%": 0.310546875, "calibration/coverage@25%": 0.423046875, "calibration/coverage@30%": 0.5171875, "calibration/coverage@5%": 0.0515625, "calibration/ece": 0.13456900502830998, "calibration/mean_confidence": 0.5221917640126743, "calibration/prompt_uniqueness": 0.86943359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.6, "completions/max_terminated_length": 473.6, "completions/mean_length": 193.7994140625, "completions/mean_terminated_length": 193.7994140625, "completions/min_length": 90.4, "completions/min_terminated_length": 90.4, "epoch": 0.512, "grad_norm": 0.0009588066022843122, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 539948984.0, "reward": 0.9407204031944275, "reward_std": 0.08999814093112946, "rewards/accuracy_reward": 0.536328125, "rewards/brier_reward": 0.7952704071998596, "rewards/confidence_uniqueness_reward": 0.9544906616210938, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0030694708693772554, "rewards/frontier_coverage_0": 0.10995662808418274, "rewards/frontier_coverage_1": 0.10995662808418274, "rewards/frontier_coverage_10": 0.10985160320997238, "rewards/frontier_coverage_15": 0.10972333252429962, "rewards/frontier_coverage_20": 0.10820228606462479, "rewards/frontier_coverage_25": 0.0877942256629467, "rewards/frontier_coverage_5": 0.10995662808418274, "rewards/frontier_ece_reward": 0.007269245106726885, "rewards/frontier_entropy_batch_reward": -0.12426345646381379, "signal/accuracy_reward/centered_abs_mean": 0.10556640625, "signal/accuracy_reward/group_bin_occupancy": 0.1796875, "signal/accuracy_reward/group_std_mean": 0.14583270251750946, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052783203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052783203125, "signal/advantage_abs_mean": 0.06839020401239396, "signal/advantage_pre_scale_abs_mean": 0.06839020401239396, "signal/advantage_pre_scale_std": 0.11198956221342087, "signal/advantage_std": 0.11198956221342087, "signal/brier_reward/centered_abs_mean": 0.13231053799390793, "signal/brier_reward/group_bin_occupancy": 0.848046875, "signal/brier_reward/group_std_mean": 0.1709081143140793, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013231054134666919, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013231054134666919, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012249898910522462, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.951953125, "signal/confidence_uniqueness_reward/group_std_mean": 0.015168083272874356, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012249899096786975, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012249899096786975, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.003360653715208173, "signal/frontier_aurc_reward/group_bin_occupancy": 0.698046875, "signal/frontier_aurc_reward/group_std_mean": 0.00556328808888793, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2008173477370295e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2008173477370295e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16207623481750488, "signal/frontier_coverage_0/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_0/group_std_mean": 0.21392302811145783, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002025953074917197, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002025953074917197, "signal/frontier_coverage_1/centered_abs_mean": 0.16207623481750488, "signal/frontier_coverage_1/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_1/group_std_mean": 0.21392302811145783, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002025953074917197, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002025953074917197, "signal/frontier_coverage_10/centered_abs_mean": 0.1619580239057541, "signal/frontier_coverage_10/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_10/group_std_mean": 0.2137796849012375, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002024475345388055, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002024475345388055, "signal/frontier_coverage_15/centered_abs_mean": 0.1616723895072937, "signal/frontier_coverage_15/group_bin_occupancy": 0.87890625, "signal/frontier_coverage_15/group_std_mean": 0.21342427134513856, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002020904840901494, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002020904840901494, "signal/frontier_coverage_20/centered_abs_mean": 0.15862617492675782, "signal/frontier_coverage_20/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_20/group_std_mean": 0.20951247215270996, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00198282718192786, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00198282718192786, "signal/frontier_coverage_25/centered_abs_mean": 0.12221903800964355, "signal/frontier_coverage_25/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_25/group_std_mean": 0.1629067152738571, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015277379658073186, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015277379658073186, "signal/frontier_coverage_5/centered_abs_mean": 0.16207623481750488, "signal/frontier_coverage_5/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_5/group_std_mean": 0.21392302811145783, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002025953074917197, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002025953074917197, "signal/frontier_ece_reward/centered_abs_mean": 0.010067025758326053, "signal/frontier_ece_reward/group_bin_occupancy": 0.678125, "signal/frontier_ece_reward/group_std_mean": 0.012916841916739941, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001006702589802444, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001006702589802444, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19091980755329133, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.772265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2507141649723053, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019091981835663317, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019091981835663317, "step": 160 }, { "calibration/aurc": 0.21586168859585855, "calibration/batch_distribution_entropy": 0.9889782873586641, "calibration/batch_entropy_100bins": 0.9707927997288179, "calibration/batch_entropy_10bins": 0.9889782873586641, "calibration/batch_entropy_50bins": 0.9823227270977654, "calibration/batch_uniqueness": 0.9561279296875, "calibration/buffer_distribution_entropy": 0.9982977167926123, "calibration/buffer_entropy_100bins": 0.9947302848566546, "calibration/buffer_entropy_10bins": 0.9982977167926123, "calibration/buffer_entropy_50bins": 0.9975194217333142, "calibration/confidence_entropy": 0.49518329062893385, "calibration/coverage@0%": 0.03046875, "calibration/coverage@1%": 0.03046875, "calibration/coverage@10%": 0.2296875, "calibration/coverage@15%": 0.41875, "calibration/coverage@20%": 0.558203125, "calibration/coverage@25%": 0.6453125, "calibration/coverage@30%": 0.741015625, "calibration/coverage@5%": 0.08828125, "calibration/ece": 0.10502446492353879, "calibration/mean_confidence": 0.5066498364850488, "calibration/prompt_uniqueness": 0.856396484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 195.665234375, "completions/mean_terminated_length": 195.665234375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.528, "grad_norm": 0.0008851737948134542, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 556982132.0, "reward": 0.9444244861602783, "reward_std": 0.08863194286823273, "rewards/accuracy_reward": 0.53828125, "rewards/brier_reward": 0.8043418645858764, "rewards/confidence_uniqueness_reward": 0.9550811767578125, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0023640868021175265, "rewards/frontier_coverage_0": 0.12184633612632752, "rewards/frontier_coverage_1": 0.12184633612632752, "rewards/frontier_coverage_10": 0.12164630815386772, "rewards/frontier_coverage_15": 0.12158802151679993, "rewards/frontier_coverage_20": 0.1201416477560997, "rewards/frontier_coverage_25": 0.09376933947205543, "rewards/frontier_coverage_5": 0.1217406578361988, "rewards/frontier_ece_reward": 0.006898148078471422, "rewards/frontier_entropy_batch_reward": -0.11600946933031082, "signal/accuracy_reward/centered_abs_mean": 0.1177001953125, "signal/accuracy_reward/group_bin_occupancy": 0.176171875, "signal/accuracy_reward/group_std_mean": 0.15068991482257843, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05885009765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05885009765625, "signal/advantage_abs_mean": 0.06982930153608322, "signal/advantage_pre_scale_abs_mean": 0.06982930153608322, "signal/advantage_pre_scale_std": 0.11278409063816071, "signal/advantage_std": 0.11278409063816071, "signal/brier_reward/centered_abs_mean": 0.12580630481243132, "signal/brier_reward/group_bin_occupancy": 0.84140625, "signal/brier_reward/group_std_mean": 0.16182839572429658, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012580630742013455, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012580630742013455, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012181806564331054, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.951171875, "signal/confidence_uniqueness_reward/group_std_mean": 0.015199671126902103, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012181806610897183, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012181806610897183, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002553233178332448, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69921875, "signal/frontier_aurc_reward/group_std_mean": 0.0044392747804522514, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1915416184347126e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1915416184347126e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17886043787002565, "signal/frontier_coverage_0/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_0/group_std_mean": 0.23067601323127745, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022357555106282236, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022357555106282236, "signal/frontier_coverage_1/centered_abs_mean": 0.17886043787002565, "signal/frontier_coverage_1/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_1/group_std_mean": 0.23067601323127745, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022357555106282236, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022357555106282236, "signal/frontier_coverage_10/centered_abs_mean": 0.1785387396812439, "signal/frontier_coverage_10/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_10/group_std_mean": 0.23026902973651886, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022317342925816776, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022317342925816776, "signal/frontier_coverage_15/centered_abs_mean": 0.17839260399341583, "signal/frontier_coverage_15/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_15/group_std_mean": 0.23008474409580232, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002229907549917698, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002229907549917698, "signal/frontier_coverage_20/centered_abs_mean": 0.1752532511949539, "signal/frontier_coverage_20/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_20/group_std_mean": 0.2261554479598999, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021906656213104723, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021906656213104723, "signal/frontier_coverage_25/centered_abs_mean": 0.12653469890356064, "signal/frontier_coverage_25/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_25/group_std_mean": 0.16514583826065063, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015816838014870882, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015816838014870882, "signal/frontier_coverage_5/centered_abs_mean": 0.17872408032417297, "signal/frontier_coverage_5/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_5/group_std_mean": 0.2305031508207321, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002234051004052162, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002234051004052162, "signal/frontier_ece_reward/centered_abs_mean": 0.008623083122074604, "signal/frontier_ece_reward/group_bin_occupancy": 0.68125, "signal/frontier_ece_reward/group_std_mean": 0.011099833622574806, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008623083238489925, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008623083238489925, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1766481250524521, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23794711530208587, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01766481213271618, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01766481213271618, "step": 165 }, { "calibration/aurc": 0.2572418976278044, "calibration/batch_distribution_entropy": 0.9795796288000306, "calibration/batch_entropy_100bins": 0.9608326685871675, "calibration/batch_entropy_10bins": 0.9795796288000306, "calibration/batch_entropy_50bins": 0.9744636836243364, "calibration/batch_uniqueness": 0.957818603515625, "calibration/buffer_distribution_entropy": 0.9980996847351415, "calibration/buffer_entropy_100bins": 0.9946257091132935, "calibration/buffer_entropy_10bins": 0.9980996847351415, "calibration/buffer_entropy_50bins": 0.9973523035428729, "calibration/confidence_entropy": 0.4783709663886726, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.065625, "calibration/coverage@15%": 0.1890625, "calibration/coverage@20%": 0.355859375, "calibration/coverage@25%": 0.54609375, "calibration/coverage@30%": 0.707421875, "calibration/coverage@5%": 0.01171875, "calibration/ece": 0.08335968835444894, "calibration/mean_confidence": 0.5398684844209177, "calibration/prompt_uniqueness": 0.870166015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.6, "completions/max_terminated_length": 519.6, "completions/mean_length": 196.367578125, "completions/mean_terminated_length": 196.367578125, "completions/min_length": 90.6, "completions/min_terminated_length": 90.6, "epoch": 0.544, "grad_norm": 0.000998239149339497, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 574156520.0, "reward": 0.9517637491226196, "reward_std": 0.0945101723074913, "rewards/accuracy_reward": 0.566796875, "rewards/brier_reward": 0.7817894101142884, "rewards/confidence_uniqueness_reward": 0.9573951721191406, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00291836392134428, "rewards/frontier_coverage_0": 0.0726560816168785, "rewards/frontier_coverage_1": 0.0726560816168785, "rewards/frontier_coverage_10": 0.07252307832241059, "rewards/frontier_coverage_15": 0.0724917471408844, "rewards/frontier_coverage_20": 0.07174690216779708, "rewards/frontier_coverage_25": 0.054031723737716676, "rewards/frontier_coverage_5": 0.0725811704993248, "rewards/frontier_ece_reward": 0.005196140892803669, "rewards/frontier_entropy_batch_reward": -0.12144885808229447, "signal/accuracy_reward/centered_abs_mean": 0.1263427734375, "signal/accuracy_reward/group_bin_occupancy": 0.181640625, "signal/accuracy_reward/group_std_mean": 0.16339680254459382, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06317138671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06317138671875, "signal/advantage_abs_mean": 0.07346977144479752, "signal/advantage_pre_scale_abs_mean": 0.07346977144479752, "signal/advantage_pre_scale_std": 0.11571350246667862, "signal/advantage_std": 0.11571350246667862, "signal/brier_reward/centered_abs_mean": 0.14408716559410095, "signal/brier_reward/group_bin_occupancy": 0.8609375, "signal/brier_reward/group_std_mean": 0.18343546390533447, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01440871749073267, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01440871749073267, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01195671558380127, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95390625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01492820680141449, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00119567159563303, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00119567159563303, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.003072212403640151, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7078125, "signal/frontier_aurc_reward/group_std_mean": 0.005290277022868395, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8402655627578496e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8402655627578496e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19166307151317596, "signal/frontier_coverage_0/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_0/group_std_mean": 0.24394794702529907, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023957884404808283, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023957884404808283, "signal/frontier_coverage_1/centered_abs_mean": 0.19166307151317596, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.24394794702529907, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023957884404808283, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023957884404808283, "signal/frontier_coverage_10/centered_abs_mean": 0.19123572409152984, "signal/frontier_coverage_10/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_10/group_std_mean": 0.2434035360813141, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023904466070234776, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023904466070234776, "signal/frontier_coverage_15/centered_abs_mean": 0.1911354660987854, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.24327577650547028, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023891933728009463, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023891933728009463, "signal/frontier_coverage_20/centered_abs_mean": 0.18775070905685426, "signal/frontier_coverage_20/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_20/group_std_mean": 0.23907591700553893, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023468838538974524, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023468838538974524, "signal/frontier_coverage_25/centered_abs_mean": 0.12309406846761703, "signal/frontier_coverage_25/group_bin_occupancy": 0.865625, "signal/frontier_coverage_25/group_std_mean": 0.1585517108440399, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015386758605018258, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015386758605018258, "signal/frontier_coverage_5/centered_abs_mean": 0.19142203629016877, "signal/frontier_coverage_5/group_bin_occupancy": 0.871484375, "signal/frontier_coverage_5/group_std_mean": 0.24364092350006103, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002392775472253561, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002392775472253561, "signal/frontier_ece_reward/centered_abs_mean": 0.009025206603109837, "signal/frontier_ece_reward/group_bin_occupancy": 0.69453125, "signal/frontier_ece_reward/group_std_mean": 0.011522319540381432, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009025206556543708, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009025206556543708, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18435474932193757, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.799609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24489886164665223, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018435475416481495, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018435475416481495, "step": 170 }, { "calibration/aurc": 0.2701489115748014, "calibration/batch_distribution_entropy": 0.9858005961207958, "calibration/batch_entropy_100bins": 0.9584672863006161, "calibration/batch_entropy_10bins": 0.9858005961207958, "calibration/batch_entropy_50bins": 0.9780489974776383, "calibration/batch_uniqueness": 0.9608780146637634, "calibration/buffer_distribution_entropy": 0.9978735851080804, "calibration/buffer_entropy_100bins": 0.9944173356403502, "calibration/buffer_entropy_10bins": 0.9978735851080804, "calibration/buffer_entropy_50bins": 0.99715421757982, "calibration/confidence_entropy": 0.5013730363986142, "calibration/coverage@0%": 0.04025501467710372, "calibration/coverage@1%": 0.04025501467710372, "calibration/coverage@10%": 0.2434396404109589, "calibration/coverage@15%": 0.28407687133072407, "calibration/coverage@20%": 0.32043480919765166, "calibration/coverage@25%": 0.44272871819960863, "calibration/coverage@30%": 0.5927577666340509, "calibration/coverage@5%": 0.13478932240704503, "calibration/ece": 0.11489932579269604, "calibration/mean_confidence": 0.493834637039139, "calibration/prompt_uniqueness": 0.8709420830352498, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1088.2, "completions/max_terminated_length": 488.6, "completions/mean_length": 196.847265625, "completions/mean_terminated_length": 196.45567321777344, "completions/min_length": 88.6, "completions/min_terminated_length": 88.6, "epoch": 0.56, "grad_norm": 0.0010303459130227566, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 590993644.0, "reward": 0.9366175770759583, "reward_std": 0.08155035525560379, "rewards/accuracy_reward": 0.52333984375, "rewards/brier_reward": 0.7957682371139526, "rewards/confidence_uniqueness_reward": 0.9611407041549682, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0028076643124222756, "rewards/frontier_coverage_0": 0.1162565752863884, "rewards/frontier_coverage_1": 0.1162565752863884, "rewards/frontier_coverage_10": 0.11595501154661178, "rewards/frontier_coverage_15": 0.11588414609432221, "rewards/frontier_coverage_20": 0.11414353549480438, "rewards/frontier_coverage_25": 0.07903541177511215, "rewards/frontier_coverage_5": 0.1160865843296051, "rewards/frontier_ece_reward": 0.0057329384610056875, "rewards/frontier_entropy_batch_reward": -0.10805188417434693, "signal/accuracy_reward/centered_abs_mean": 0.092449951171875, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.12797222435474395, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0462249755859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0462249755859375, "signal/advantage_abs_mean": 0.06160459816455841, "signal/advantage_pre_scale_abs_mean": 0.06160459816455841, "signal/advantage_pre_scale_std": 0.10464294999837875, "signal/advantage_std": 0.10464294999837875, "signal/brier_reward/centered_abs_mean": 0.1313459038734436, "signal/brier_reward/group_bin_occupancy": 0.859375, "signal/brier_reward/group_std_mean": 0.169065198302269, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013134590722620488, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013134590722620488, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012869596667587756, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9203125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01707104854285717, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012869596714153886, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012869596714153886, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027754565235227346, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71640625, "signal/frontier_aurc_reward/group_std_mean": 0.004652646463364362, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.469320727162994e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.469320727162994e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16855145394802093, "signal/frontier_coverage_0/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_0/group_std_mean": 0.21740144789218901, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002106893085874617, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002106893085874617, "signal/frontier_coverage_1/centered_abs_mean": 0.16855145394802093, "signal/frontier_coverage_1/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_1/group_std_mean": 0.21740144789218901, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002106893085874617, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002106893085874617, "signal/frontier_coverage_10/centered_abs_mean": 0.16817551851272583, "signal/frontier_coverage_10/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_10/group_std_mean": 0.21691781878471375, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021021940745413305, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021021940745413305, "signal/frontier_coverage_15/centered_abs_mean": 0.16808723509311677, "signal/frontier_coverage_15/group_bin_occupancy": 0.8875, "signal/frontier_coverage_15/group_std_mean": 0.21680429875850676, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021010904340073465, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021010904340073465, "signal/frontier_coverage_20/centered_abs_mean": 0.16493748724460602, "signal/frontier_coverage_20/group_bin_occupancy": 0.884765625, "signal/frontier_coverage_20/group_std_mean": 0.2127989798784256, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002061718562617898, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002061718562617898, "signal/frontier_coverage_25/centered_abs_mean": 0.10539229065179825, "signal/frontier_coverage_25/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_25/group_std_mean": 0.13706440329551697, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001317403675056994, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001317403675056994, "signal/frontier_coverage_5/centered_abs_mean": 0.1683394968509674, "signal/frontier_coverage_5/group_bin_occupancy": 0.8875, "signal/frontier_coverage_5/group_std_mean": 0.21712871193885802, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002104243659414351, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002104243659414351, "signal/frontier_ece_reward/centered_abs_mean": 0.007921327650547028, "signal/frontier_ece_reward/group_bin_occupancy": 0.670703125, "signal/frontier_ece_reward/group_std_mean": 0.010234402120113372, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007921327836811542, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007921327836811542, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1686824917793274, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.777734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.22734990119934081, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016868249885737895, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016868249885737895, "step": 175 }, { "calibration/aurc": 0.30699371259582486, "calibration/batch_distribution_entropy": 0.9854770880284256, "calibration/batch_entropy_100bins": 0.9541744663609115, "calibration/batch_entropy_10bins": 0.9854770880284256, "calibration/batch_entropy_50bins": 0.9757598769843548, "calibration/batch_uniqueness": 0.963922119140625, "calibration/buffer_distribution_entropy": 0.9981526346641395, "calibration/buffer_entropy_100bins": 0.9944422625280505, "calibration/buffer_entropy_10bins": 0.9981526346641395, "calibration/buffer_entropy_50bins": 0.9972570207198382, "calibration/confidence_entropy": 0.496733407619662, "calibration/coverage@0%": 0.0484375, "calibration/coverage@1%": 0.0484375, "calibration/coverage@10%": 0.131640625, "calibration/coverage@15%": 0.21015625, "calibration/coverage@20%": 0.263671875, "calibration/coverage@25%": 0.35703125, "calibration/coverage@30%": 0.519921875, "calibration/coverage@5%": 0.062109375, "calibration/ece": 0.10147968634650846, "calibration/mean_confidence": 0.4753874622329694, "calibration/prompt_uniqueness": 0.86865234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.6, "completions/max_terminated_length": 514.6, "completions/mean_length": 198.885546875, "completions/mean_terminated_length": 198.885546875, "completions/min_length": 98.2, "completions/min_terminated_length": 98.2, "epoch": 0.576, "grad_norm": 0.0010701222345232964, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 608216856.0, "reward": 0.9302929162979126, "reward_std": 0.07859764248132706, "rewards/accuracy_reward": 0.51591796875, "rewards/brier_reward": 0.7868143916130066, "rewards/confidence_uniqueness_reward": 0.9633003234863281, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002910622302442789, "rewards/frontier_coverage_0": 0.1169891744852066, "rewards/frontier_coverage_1": 0.1169891744852066, "rewards/frontier_coverage_10": 0.11675787419080734, "rewards/frontier_coverage_15": 0.1165872111916542, "rewards/frontier_coverage_20": 0.11500040218234062, "rewards/frontier_coverage_25": 0.07931032329797745, "rewards/frontier_coverage_5": 0.1169125959277153, "rewards/frontier_ece_reward": 0.004967722669243812, "rewards/frontier_entropy_batch_reward": -0.12869784384965896, "signal/accuracy_reward/centered_abs_mean": 0.089678955078125, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12337614744901657, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0448394775390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0448394775390625, "signal/advantage_abs_mean": 0.059406136721372606, "signal/advantage_pre_scale_abs_mean": 0.059406136721372606, "signal/advantage_pre_scale_std": 0.09829565286636352, "signal/advantage_std": 0.09829565286636352, "signal/brier_reward/centered_abs_mean": 0.13098579347133638, "signal/brier_reward/group_bin_occupancy": 0.851171875, "signal/brier_reward/group_std_mean": 0.1678290545940399, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01309858001768589, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01309858001768589, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013459730148315429, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.890625, "signal/confidence_uniqueness_reward/group_std_mean": 0.017082039453089237, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013459730194881558, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013459730194881558, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026516387006267905, "signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375, "signal/frontier_aurc_reward/group_std_mean": 0.004528477415442466, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.314548303023912e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.314548303023912e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17838220298290253, "signal/frontier_coverage_0/group_bin_occupancy": 0.8734375, "signal/frontier_coverage_0/group_std_mean": 0.22681029438972472, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002229777490720153, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002229777490720153, "signal/frontier_coverage_1/centered_abs_mean": 0.17838220298290253, "signal/frontier_coverage_1/group_bin_occupancy": 0.8734375, "signal/frontier_coverage_1/group_std_mean": 0.22681029438972472, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002229777490720153, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002229777490720153, "signal/frontier_coverage_10/centered_abs_mean": 0.17802486717700958, "signal/frontier_coverage_10/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_10/group_std_mean": 0.22636044323444365, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002225310867652297, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002225310867652297, "signal/frontier_coverage_15/centered_abs_mean": 0.17769393920898438, "signal/frontier_coverage_15/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_15/group_std_mean": 0.2259401261806488, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022211743518710135, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022211743518710135, "signal/frontier_coverage_20/centered_abs_mean": 0.17457264065742492, "signal/frontier_coverage_20/group_bin_occupancy": 0.871484375, "signal/frontier_coverage_20/group_std_mean": 0.22201407551765442, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021821580128744246, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021821580128744246, "signal/frontier_coverage_25/centered_abs_mean": 0.11014373302459717, "signal/frontier_coverage_25/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_25/group_std_mean": 0.14102184176445007, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013767967000603675, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013767967000603675, "signal/frontier_coverage_5/centered_abs_mean": 0.1782844364643097, "signal/frontier_coverage_5/group_bin_occupancy": 0.8734375, "signal/frontier_coverage_5/group_std_mean": 0.22668357491493224, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00222855550237, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00222855550237, "signal/frontier_ece_reward/centered_abs_mean": 0.00747135728597641, "signal/frontier_ece_reward/group_bin_occupancy": 0.6625, "signal/frontier_ece_reward/group_std_mean": 0.009620749577879905, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007471357355825603, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007471357355825603, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1880700945854187, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25184816122055054, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01880700998008251, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01880700998008251, "step": 180 }, { "calibration/aurc": 0.3112719069113949, "calibration/batch_distribution_entropy": 0.9798265397899921, "calibration/batch_entropy_100bins": 0.946650027126187, "calibration/batch_entropy_10bins": 0.9798265397899921, "calibration/batch_entropy_50bins": 0.9711012111621464, "calibration/batch_uniqueness": 0.960726591011013, "calibration/buffer_distribution_entropy": 0.9982804261548217, "calibration/buffer_entropy_100bins": 0.9943895114204372, "calibration/buffer_entropy_10bins": 0.9982804261548217, "calibration/buffer_entropy_50bins": 0.9972331678385433, "calibration/confidence_entropy": 0.48484172667766945, "calibration/coverage@0%": 0.012899033757338552, "calibration/coverage@1%": 0.012899033757338552, "calibration/coverage@10%": 0.20021633439334638, "calibration/coverage@15%": 0.29246346012720154, "calibration/coverage@20%": 0.3956931873776908, "calibration/coverage@25%": 0.49148880870841494, "calibration/coverage@30%": 0.5556078767123288, "calibration/coverage@5%": 0.046944563356164384, "calibration/ece": 0.14819788302443063, "calibration/mean_confidence": 0.4693338775372896, "calibration/prompt_uniqueness": 0.8631444194686525, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 950.8, "completions/max_terminated_length": 725.0, "completions/mean_length": 201.2125, "completions/mean_terminated_length": 200.95231018066406, "completions/min_length": 93.2, "completions/min_terminated_length": 93.2, "epoch": 0.592, "grad_norm": 0.0008901117253117263, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 625444984.0, "reward": 0.9317437887191773, "reward_std": 0.0811847597360611, "rewards/accuracy_reward": 0.5265625, "rewards/brier_reward": 0.7796778678894043, "rewards/confidence_uniqueness_reward": 0.9608142495155334, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002869694633409381, "rewards/frontier_coverage_0": 0.1071198582649231, "rewards/frontier_coverage_1": 0.1071198582649231, "rewards/frontier_coverage_10": 0.10704143643379212, "rewards/frontier_coverage_15": 0.10693599283695221, "rewards/frontier_coverage_20": 0.10569211542606353, "rewards/frontier_coverage_25": 0.06944319903850556, "rewards/frontier_coverage_5": 0.1071198582649231, "rewards/frontier_ece_reward": 0.004962181858718395, "rewards/frontier_entropy_batch_reward": -0.14830300509929656, "signal/accuracy_reward/centered_abs_mean": 0.099169921875, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12951190322637557, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495849609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0495849609375, "signal/advantage_abs_mean": 0.06339782625436782, "signal/advantage_pre_scale_abs_mean": 0.06339782625436782, "signal/advantage_pre_scale_std": 0.10178755819797516, "signal/advantage_std": 0.10178755819797516, "signal/brier_reward/centered_abs_mean": 0.13208021223545074, "signal/brier_reward/group_bin_occupancy": 0.830859375, "signal/brier_reward/group_std_mean": 0.16981416642665864, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013208021223545075, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013208021223545075, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016946067102253437, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.83671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.022317757830023766, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0016946068033576011, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016946068033576011, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027397330850362778, "signal/frontier_aurc_reward/group_bin_occupancy": 0.698828125, "signal/frontier_aurc_reward/group_std_mean": 0.004624523315578699, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4246665018144994e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4246665018144994e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1888021171092987, "signal/frontier_coverage_0/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_0/group_std_mean": 0.23875601589679718, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002360026491805911, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002360026491805911, "signal/frontier_coverage_1/centered_abs_mean": 0.1888021171092987, "signal/frontier_coverage_1/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_1/group_std_mean": 0.23875601589679718, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002360026491805911, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002360026491805911, "signal/frontier_coverage_10/centered_abs_mean": 0.18852269053459167, "signal/frontier_coverage_10/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_10/group_std_mean": 0.23838862776756287, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023565337061882017, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023565337061882017, "signal/frontier_coverage_15/centered_abs_mean": 0.18781451284885406, "signal/frontier_coverage_15/group_bin_occupancy": 0.86875, "signal/frontier_coverage_15/group_std_mean": 0.2375061869621277, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023476815316826106, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023476815316826106, "signal/frontier_coverage_20/centered_abs_mean": 0.18328932523727418, "signal/frontier_coverage_20/group_bin_occupancy": 0.865625, "signal/frontier_coverage_20/group_std_mean": 0.2319197654724121, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002291116584092379, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002291116584092379, "signal/frontier_coverage_25/centered_abs_mean": 0.11159079521894455, "signal/frontier_coverage_25/group_bin_occupancy": 0.878125, "signal/frontier_coverage_25/group_std_mean": 0.14274431467056276, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013948849868029357, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013948849868029357, "signal/frontier_coverage_5/centered_abs_mean": 0.1888021171092987, "signal/frontier_coverage_5/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_5/group_std_mean": 0.23875601589679718, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002360026491805911, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002360026491805911, "signal/frontier_ece_reward/centered_abs_mean": 0.0074934825301170346, "signal/frontier_ece_reward/group_bin_occupancy": 0.656640625, "signal/frontier_ece_reward/group_std_mean": 0.009578016772866248, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007493482902646065, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007493482902646065, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21184809803962706, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.731640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2794174253940582, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021184809505939484, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021184809505939484, "step": 185 }, { "calibration/aurc": 0.23472786932091197, "calibration/batch_distribution_entropy": 0.9717881079374342, "calibration/batch_entropy_100bins": 0.946275985669382, "calibration/batch_entropy_10bins": 0.9717881079374342, "calibration/batch_entropy_50bins": 0.9700067463765413, "calibration/batch_uniqueness": 0.9602325439453125, "calibration/buffer_distribution_entropy": 0.9984455204444757, "calibration/buffer_entropy_100bins": 0.9943282725910221, "calibration/buffer_entropy_10bins": 0.9984455204444757, "calibration/buffer_entropy_50bins": 0.9972307313275625, "calibration/confidence_entropy": 0.4705205830282598, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.258984375, "calibration/coverage@15%": 0.38125, "calibration/coverage@20%": 0.5, "calibration/coverage@25%": 0.613671875, "calibration/coverage@30%": 0.703125, "calibration/coverage@5%": 0.0734375, "calibration/ece": 0.09547035939570313, "calibration/mean_confidence": 0.4757562031042969, "calibration/prompt_uniqueness": 0.859912109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 668.0, "completions/max_terminated_length": 464.2, "completions/mean_length": 208.0927734375, "completions/mean_terminated_length": 207.96302185058593, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.608, "grad_norm": 0.0007230865303426981, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 642575342.0, "reward": 0.9399636507034301, "reward_std": 0.07548893839120865, "rewards/accuracy_reward": 0.5287109375, "rewards/brier_reward": 0.8094612717628479, "rewards/confidence_uniqueness_reward": 0.9632078409194946, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00239487262442708, "rewards/frontier_coverage_0": 0.13547399938106536, "rewards/frontier_coverage_1": 0.13547399938106536, "rewards/frontier_coverage_10": 0.13500653803348542, "rewards/frontier_coverage_15": 0.13492192924022675, "rewards/frontier_coverage_20": 0.1289721041917801, "rewards/frontier_coverage_25": 0.08149012476205826, "rewards/frontier_coverage_5": 0.13547399938106536, "rewards/frontier_ece_reward": 0.0059300497174263, "rewards/frontier_entropy_batch_reward": -0.13258121609687806, "signal/accuracy_reward/centered_abs_mean": 0.09466552734375, "signal/accuracy_reward/group_bin_occupancy": 0.1671875, "signal/accuracy_reward/group_std_mean": 0.12220577746629716, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047332763671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047332763671875, "signal/advantage_abs_mean": 0.0584713064134121, "signal/advantage_pre_scale_abs_mean": 0.0584713064134121, "signal/advantage_pre_scale_std": 0.09739241003990173, "signal/advantage_std": 0.09739241003990173, "signal/brier_reward/centered_abs_mean": 0.12433077991008759, "signal/brier_reward/group_bin_occupancy": 0.836328125, "signal/brier_reward/group_std_mean": 0.15874851047992705, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0124330785125494, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0124330785125494, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014638883247971534, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.860546875, "signal/confidence_uniqueness_reward/group_std_mean": 0.01877986006438732, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014638883760198951, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014638883760198951, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023107386427000166, "signal/frontier_aurc_reward/group_bin_occupancy": 0.734375, "signal/frontier_aurc_reward/group_std_mean": 0.0037692871410399675, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8884234416182154e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8884234416182154e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1809740960597992, "signal/frontier_coverage_0/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_0/group_std_mean": 0.22811082899570465, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022621762473136187, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022621762473136187, "signal/frontier_coverage_1/centered_abs_mean": 0.1809740960597992, "signal/frontier_coverage_1/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_1/group_std_mean": 0.22811082899570465, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022621762473136187, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022621762473136187, "signal/frontier_coverage_10/centered_abs_mean": 0.18020015954971313, "signal/frontier_coverage_10/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_10/group_std_mean": 0.22716450095176696, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002252502040937543, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002252502040937543, "signal/frontier_coverage_15/centered_abs_mean": 0.18002530038356782, "signal/frontier_coverage_15/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_15/group_std_mean": 0.22695587575435638, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022503164131194354, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022503164131194354, "signal/frontier_coverage_20/centered_abs_mean": 0.17040481567382812, "signal/frontier_coverage_20/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_20/group_std_mean": 0.21481127440929412, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021300603169947863, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021300603169947863, "signal/frontier_coverage_25/centered_abs_mean": 0.09835156053304672, "signal/frontier_coverage_25/group_bin_occupancy": 0.88125, "signal/frontier_coverage_25/group_std_mean": 0.12501538395881653, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012293945765122771, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012293945765122771, "signal/frontier_coverage_5/centered_abs_mean": 0.1809740960597992, "signal/frontier_coverage_5/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_5/group_std_mean": 0.22811082899570465, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022621762473136187, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022621762473136187, "signal/frontier_ece_reward/centered_abs_mean": 0.007257287390530109, "signal/frontier_ece_reward/group_bin_occupancy": 0.640234375, "signal/frontier_ece_reward/group_std_mean": 0.00914002489298582, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007257287506945431, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007257287506945431, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1907802402973175, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25298523604869844, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019078024849295615, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019078024849295615, "step": 190 }, { "calibration/aurc": 0.2700117873695714, "calibration/batch_distribution_entropy": 0.9849202351740087, "calibration/batch_entropy_100bins": 0.9480692672613837, "calibration/batch_entropy_10bins": 0.9849202351740087, "calibration/batch_entropy_50bins": 0.9754065036177344, "calibration/batch_uniqueness": 0.9661651611328125, "calibration/buffer_distribution_entropy": 0.9984497746065746, "calibration/buffer_entropy_100bins": 0.9940789054446035, "calibration/buffer_entropy_10bins": 0.9984497746065746, "calibration/buffer_entropy_50bins": 0.997180604698514, "calibration/confidence_entropy": 0.5025687742045375, "calibration/coverage@0%": 0.01484375, "calibration/coverage@1%": 0.01484375, "calibration/coverage@10%": 0.082421875, "calibration/coverage@15%": 0.191015625, "calibration/coverage@20%": 0.360546875, "calibration/coverage@25%": 0.4765625, "calibration/coverage@30%": 0.6328125, "calibration/coverage@5%": 0.026171875, "calibration/ece": 0.08137528389843751, "calibration/mean_confidence": 0.523429369834375, "calibration/prompt_uniqueness": 0.874169921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.2, "completions/max_terminated_length": 522.2, "completions/mean_length": 214.3845703125, "completions/mean_terminated_length": 214.3845703125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.624, "grad_norm": 0.0008414575131610036, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 660114544.0, "reward": 0.9401260256767273, "reward_std": 0.08299153149127961, "rewards/accuracy_reward": 0.528125, "rewards/brier_reward": 0.805389142036438, "rewards/confidence_uniqueness_reward": 0.9662490844726562, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002919661020860076, "rewards/frontier_coverage_0": 0.12498158812522889, "rewards/frontier_coverage_1": 0.12498158812522889, "rewards/frontier_coverage_10": 0.12457616329193115, "rewards/frontier_coverage_15": 0.12416773438453674, "rewards/frontier_coverage_20": 0.11661605983972549, "rewards/frontier_coverage_25": 0.07004784420132637, "rewards/frontier_coverage_5": 0.12498158812522889, "rewards/frontier_ece_reward": 0.004868039395660162, "rewards/frontier_entropy_batch_reward": -0.11680007576942444, "signal/accuracy_reward/centered_abs_mean": 0.099853515625, "signal/accuracy_reward/group_bin_occupancy": 0.173046875, "signal/accuracy_reward/group_std_mean": 0.13264059126377106, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0499267578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0499267578125, "signal/advantage_abs_mean": 0.0640070766210556, "signal/advantage_pre_scale_abs_mean": 0.0640070766210556, "signal/advantage_pre_scale_std": 0.1057273805141449, "signal/advantage_std": 0.1057273805141449, "signal/brier_reward/centered_abs_mean": 0.1260695680975914, "signal/brier_reward/group_bin_occupancy": 0.85234375, "signal/brier_reward/group_std_mean": 0.16242826581001282, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012606956996023655, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012606956996023655, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012650418281555175, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.878515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.015894681215286255, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001265041856095195, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001265041856095195, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002833597734570503, "signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375, "signal/frontier_aurc_reward/group_std_mean": 0.0049308110028505325, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.541997357388027e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.541997357388027e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16888214647769928, "signal/frontier_coverage_0/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_0/group_std_mean": 0.21781981885433196, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021110267844051123, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021110267844051123, "signal/frontier_coverage_1/centered_abs_mean": 0.16888214647769928, "signal/frontier_coverage_1/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_1/group_std_mean": 0.21781981885433196, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021110267844051123, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021110267844051123, "signal/frontier_coverage_10/centered_abs_mean": 0.16817994117736818, "signal/frontier_coverage_10/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_10/group_std_mean": 0.21693590581417083, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002102249301970005, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002102249301970005, "signal/frontier_coverage_15/centered_abs_mean": 0.16733984053134918, "signal/frontier_coverage_15/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_15/group_std_mean": 0.21589226722717286, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020917480811476707, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020917480811476707, "signal/frontier_coverage_20/centered_abs_mean": 0.15284331440925597, "signal/frontier_coverage_20/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_20/group_std_mean": 0.19766626060009002, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019105415092781186, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019105415092781186, "signal/frontier_coverage_25/centered_abs_mean": 0.08127258270978928, "signal/frontier_coverage_25/group_bin_occupancy": 0.8921875, "signal/frontier_coverage_25/group_std_mean": 0.10656125694513321, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010159073397517204, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010159073397517204, "signal/frontier_coverage_5/centered_abs_mean": 0.16888214647769928, "signal/frontier_coverage_5/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_5/group_std_mean": 0.21781981885433196, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021110267844051123, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021110267844051123, "signal/frontier_ece_reward/centered_abs_mean": 0.006312453839927912, "signal/frontier_ece_reward/group_bin_occupancy": 0.645703125, "signal/frontier_ece_reward/group_std_mean": 0.008178574219346046, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006312454002909363, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006312454002909363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18553299009799956, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2506150871515274, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01855330020189285, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01855330020189285, "step": 195 }, { "calibration/aurc": 0.28188014168873426, "calibration/batch_distribution_entropy": 0.9805733776248658, "calibration/batch_entropy_100bins": 0.9462408809338638, "calibration/batch_entropy_10bins": 0.9805733776248658, "calibration/batch_entropy_50bins": 0.9735475700074046, "calibration/batch_uniqueness": 0.96727294921875, "calibration/buffer_distribution_entropy": 0.9985629674566393, "calibration/buffer_entropy_100bins": 0.993793542280277, "calibration/buffer_entropy_10bins": 0.9985629674566393, "calibration/buffer_entropy_50bins": 0.9971571943470149, "calibration/confidence_entropy": 0.5155245961834228, "calibration/coverage@0%": 0.022265625, "calibration/coverage@1%": 0.022265625, "calibration/coverage@10%": 0.27109375, "calibration/coverage@15%": 0.342578125, "calibration/coverage@20%": 0.390234375, "calibration/coverage@25%": 0.479296875, "calibration/coverage@30%": 0.586328125, "calibration/coverage@5%": 0.169140625, "calibration/ece": 0.16515665708359378, "calibration/mean_confidence": 0.5311759464164062, "calibration/prompt_uniqueness": 0.88291015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 764.2, "completions/max_terminated_length": 574.0, "completions/mean_length": 220.08583984375, "completions/mean_terminated_length": 219.95709533691405, "completions/min_length": 101.6, "completions/min_terminated_length": 101.6, "epoch": 0.64, "grad_norm": 0.002240754896774888, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 677710911.0, "reward": 0.9547593593597412, "reward_std": 0.07518478035926819, "rewards/accuracy_reward": 0.568359375, "rewards/brier_reward": 0.8007413268089294, "rewards/confidence_uniqueness_reward": 0.965564739704132, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002697795373387635, "rewards/frontier_coverage_0": 0.0924006424844265, "rewards/frontier_coverage_1": 0.0924006424844265, "rewards/frontier_coverage_10": 0.09219776839017868, "rewards/frontier_coverage_15": 0.09181699305772781, "rewards/frontier_coverage_20": 0.08537925407290459, "rewards/frontier_coverage_25": 0.0547153040766716, "rewards/frontier_coverage_5": 0.0924006424844265, "rewards/frontier_ece_reward": 0.004436677880585193, "rewards/frontier_entropy_batch_reward": -0.1387961506843567, "signal/accuracy_reward/centered_abs_mean": 0.07882080078125, "signal/accuracy_reward/group_bin_occupancy": 0.164453125, "signal/accuracy_reward/group_std_mean": 0.10743281841278077, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039410400390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039410400390625, "signal/advantage_abs_mean": 0.05716605037450791, "signal/advantage_pre_scale_abs_mean": 0.05716605037450791, "signal/advantage_pre_scale_std": 0.09739655405282974, "signal/advantage_std": 0.09739655405282974, "signal/brier_reward/centered_abs_mean": 0.1214315801858902, "signal/brier_reward/group_bin_occupancy": 0.8515625, "signal/brier_reward/group_std_mean": 0.15803540349006653, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012143158353865147, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012143158353865147, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012997383438050746, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.862109375, "signal/confidence_uniqueness_reward/group_std_mean": 0.017083115130662917, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012997383950278164, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012997383950278164, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027137193828821182, "signal/frontier_aurc_reward/group_bin_occupancy": 0.695703125, "signal/frontier_aurc_reward/group_std_mean": 0.004572696890681982, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.392149301362224e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.392149301362224e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15220192968845367, "signal/frontier_coverage_0/group_bin_occupancy": 0.88125, "signal/frontier_coverage_0/group_std_mean": 0.19722159206867218, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019025241024792194, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019025241024792194, "signal/frontier_coverage_1/centered_abs_mean": 0.15220192968845367, "signal/frontier_coverage_1/group_bin_occupancy": 0.88125, "signal/frontier_coverage_1/group_std_mean": 0.19722159206867218, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019025241024792194, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019025241024792194, "signal/frontier_coverage_10/centered_abs_mean": 0.15184370577335357, "signal/frontier_coverage_10/group_bin_occupancy": 0.88046875, "signal/frontier_coverage_10/group_std_mean": 0.19676703512668609, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018980463733896612, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018980463733896612, "signal/frontier_coverage_15/centered_abs_mean": 0.15081839263439178, "signal/frontier_coverage_15/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_15/group_std_mean": 0.19544895887374877, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018852299312129618, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018852299312129618, "signal/frontier_coverage_20/centered_abs_mean": 0.13319507688283921, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.17282224893569947, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016649385681375862, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016649385681375862, "signal/frontier_coverage_25/centered_abs_mean": 0.06962503343820572, "signal/frontier_coverage_25/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_25/group_std_mean": 0.09113402217626572, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000870312936604023, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000870312936604023, "signal/frontier_coverage_5/centered_abs_mean": 0.15220192968845367, "signal/frontier_coverage_5/group_bin_occupancy": 0.88125, "signal/frontier_coverage_5/group_std_mean": 0.19722159206867218, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019025241024792194, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019025241024792194, "signal/frontier_ece_reward/centered_abs_mean": 0.006119345035403967, "signal/frontier_ece_reward/group_bin_occupancy": 0.6296875, "signal/frontier_ece_reward/group_std_mean": 0.007958116941154003, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006119344965554774, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006119344965554774, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19946256577968596, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.26395664513111117, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019946256838738918, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019946256838738918, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.45046525025940026, "eval_calibration/batch_distribution_entropy": 0.9294361395739166, "eval_calibration/batch_entropy_100bins": 0.7037637102944632, "eval_calibration/batch_entropy_10bins": 0.9294361395739166, "eval_calibration/batch_entropy_50bins": 0.7976394251687032, "eval_calibration/batch_uniqueness": 0.9111328125, "eval_calibration/buffer_distribution_entropy": 0.9986805045240452, "eval_calibration/buffer_entropy_100bins": 0.9934918532172449, "eval_calibration/buffer_entropy_10bins": 0.9986805045240452, "eval_calibration/buffer_entropy_50bins": 0.9971155639822349, "eval_calibration/confidence_entropy": 0.4882177446761135, "eval_calibration/coverage@0%": 0.0859375, "eval_calibration/coverage@1%": 0.0859375, "eval_calibration/coverage@10%": 0.1328125, "eval_calibration/coverage@15%": 0.1328125, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.171875, "eval_calibration/coverage@30%": 0.1796875, "eval_calibration/coverage@5%": 0.0859375, "eval_calibration/ece": 0.231015625, "eval_calibration/mean_confidence": 0.483984375, "eval_calibration/prompt_uniqueness": 0.9111328125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 421.25, "eval_completions/max_terminated_length": 421.25, "eval_completions/mean_length": 223.7101287841797, "eval_completions/mean_terminated_length": 223.7101287841797, "eval_completions/min_length": 122.5, "eval_completions/min_terminated_length": 122.5, "eval_loss": 0.0, "eval_num_tokens": 677710911.0, "eval_reward": 0.8325561285018921, "eval_reward_std": 0.23123450949788094, "eval_rewards/accuracy_reward": 0.421875, "eval_rewards/brier_reward": 0.7981202453374863, "eval_rewards/confidence_uniqueness_reward": 0.9072265625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003328263759613037, "eval_rewards/frontier_coverage_0": 0.183719702064991, "eval_rewards/frontier_coverage_1": 0.183719702064991, "eval_rewards/frontier_coverage_10": 0.1834387667477131, "eval_rewards/frontier_coverage_15": 0.18294651806354523, "eval_rewards/frontier_coverage_20": 0.16035383194684982, "eval_rewards/frontier_coverage_25": 0.0812362264841795, "eval_rewards/frontier_coverage_5": 0.183719702064991, "eval_rewards/frontier_ece_reward": 0.004539699875749648, "eval_rewards/frontier_entropy_batch_reward": -0.6381759643554688, "eval_runtime": 21.1259, "eval_samples_per_second": 23.668, "eval_signal/accuracy_reward/centered_abs_mean": 0.470703125, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4925759807229042, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2353515625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2353515625, "eval_signal/advantage_abs_mean": 0.21354259178042412, "eval_signal/advantage_pre_scale_abs_mean": 0.21354259178042412, "eval_signal/advantage_pre_scale_std": 0.2286580204963684, "eval_signal/advantage_std": 0.2286580204963684, "eval_signal/brier_reward/centered_abs_mean": 0.17807075753808022, "eval_signal/brier_reward/group_bin_occupancy": 0.84375, "eval_signal/brier_reward/group_std_mean": 0.2285139560699463, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017807076685130596, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.017807076685130596, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0390625, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.047283546067774296, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003906250116415322, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003906250116415322, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004127664549741894, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008040638058446348, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1595807235571556e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1595807235571556e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.33542679250240326, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_0/group_std_mean": 0.41075549274683, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004192834836430848, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004192834836430848, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.33542679250240326, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_1/group_std_mean": 0.41075549274683, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004192834836430848, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004192834836430848, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.33442478626966476, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_10/group_std_mean": 0.4096386879682541, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041803098283708096, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041803098283708096, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3335842937231064, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_15/group_std_mean": 0.4086746945977211, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004169803811237216, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004169803811237216, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.28882090747356415, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_20/group_std_mean": 0.3565327152609825, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00361026159953326, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00361026159953326, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.1293876338750124, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_25/group_std_mean": 0.16918417811393738, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016173454350791872, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016173454350791872, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.33542679250240326, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_5/group_std_mean": 0.41075549274683, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004192834836430848, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004192834836430848, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.007051818422041833, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875, "eval_signal/frontier_ece_reward/group_std_mean": 0.009225662797689438, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007051818392938003, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007051818392938003, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.332033634185791, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3422466069459915, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0332033634185791, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0332033634185791, "eval_steps_per_second": 0.189, "step": 200 }, { "calibration/aurc": 0.41416409000016474, "calibration/batch_distribution_entropy": 0.9835108836589204, "calibration/batch_entropy_100bins": 0.9492298170911946, "calibration/batch_entropy_10bins": 0.9835108836589204, "calibration/batch_entropy_50bins": 0.9746778983389239, "calibration/batch_uniqueness": 0.964678955078125, "calibration/buffer_distribution_entropy": 0.9987585634745246, "calibration/buffer_entropy_100bins": 0.9932715162674421, "calibration/buffer_entropy_10bins": 0.9987585634745246, "calibration/buffer_entropy_50bins": 0.9970967811961777, "calibration/confidence_entropy": 0.5169030002642951, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.00625, "calibration/coverage@15%": 0.022265625, "calibration/coverage@20%": 0.051953125, "calibration/coverage@25%": 0.156640625, "calibration/coverage@30%": 0.269140625, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.11185393470703127, "calibration/mean_confidence": 0.47490546869140626, "calibration/prompt_uniqueness": 0.87470703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 788.8, "completions/max_terminated_length": 584.2, "completions/mean_length": 216.35341796875, "completions/mean_terminated_length": 216.2248046875, "completions/min_length": 97.6, "completions/min_terminated_length": 97.6, "epoch": 0.656, "grad_norm": 0.0008305140654556453, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 694782914.0, "reward": 0.9230387210845947, "reward_std": 0.08519956022500992, "rewards/accuracy_reward": 0.50146484375, "rewards/brier_reward": 0.7805219888687134, "rewards/confidence_uniqueness_reward": 0.9658442378044129, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0030296589247882366, "rewards/frontier_coverage_0": 0.11411752551794052, "rewards/frontier_coverage_1": 0.11411752551794052, "rewards/frontier_coverage_10": 0.113961161673069, "rewards/frontier_coverage_15": 0.11310399323701859, "rewards/frontier_coverage_20": 0.10050597786903381, "rewards/frontier_coverage_25": 0.05612687692046166, "rewards/frontier_coverage_5": 0.11411752551794052, "rewards/frontier_ece_reward": 0.0036422216799110174, "rewards/frontier_entropy_batch_reward": -0.11683483868837356, "signal/accuracy_reward/centered_abs_mean": 0.101885986328125, "signal/accuracy_reward/group_bin_occupancy": 0.1734375, "signal/accuracy_reward/group_std_mean": 0.1349509835243225, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0509429931640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0509429931640625, "signal/advantage_abs_mean": 0.06606777310371399, "signal/advantage_pre_scale_abs_mean": 0.06606777310371399, "signal/advantage_pre_scale_std": 0.10768669247627258, "signal/advantage_std": 0.10768669247627258, "signal/brier_reward/centered_abs_mean": 0.1326069116592407, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.1697738140821457, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013260690867900849, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013260690867900849, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013172254525125026, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85703125, "signal/confidence_uniqueness_reward/group_std_mean": 0.017005473747849463, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013172254897654057, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013172254897654057, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002756138565018773, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70078125, "signal/frontier_aurc_reward/group_std_mean": 0.004585301177576185, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4451731698936784e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4451731698936784e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1743350952863693, "signal/frontier_coverage_0/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_0/group_std_mean": 0.22367313802242278, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021791885839775203, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021791885839775203, "signal/frontier_coverage_1/centered_abs_mean": 0.1743350952863693, "signal/frontier_coverage_1/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_1/group_std_mean": 0.22367313802242278, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021791885839775203, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021791885839775203, "signal/frontier_coverage_10/centered_abs_mean": 0.17374208867549895, "signal/frontier_coverage_10/group_bin_occupancy": 0.88125, "signal/frontier_coverage_10/group_std_mean": 0.22294133603572847, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021717761643230914, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021717761643230914, "signal/frontier_coverage_15/centered_abs_mean": 0.17231329381465912, "signal/frontier_coverage_15/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_15/group_std_mean": 0.22113268971443176, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021539161913096904, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021539161913096904, "signal/frontier_coverage_20/centered_abs_mean": 0.15182446539402009, "signal/frontier_coverage_20/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_20/group_std_mean": 0.195146906375885, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018978057894855737, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018978057894855737, "signal/frontier_coverage_25/centered_abs_mean": 0.07687772065401077, "signal/frontier_coverage_25/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_25/group_std_mean": 0.09964980781078339, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000960971531458199, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000960971531458199, "signal/frontier_coverage_5/centered_abs_mean": 0.1743350952863693, "signal/frontier_coverage_5/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_5/group_std_mean": 0.22367313802242278, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021791885839775203, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021791885839775203, "signal/frontier_ece_reward/centered_abs_mean": 0.005466759670525789, "signal/frontier_ece_reward/group_bin_occupancy": 0.666015625, "signal/frontier_ece_reward/group_std_mean": 0.007180223613977433, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005466759903356433, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005466759903356433, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18491021990776063, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2515652894973755, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018491022288799286, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018491022288799286, "step": 205 }, { "calibration/aurc": 0.30381564409048867, "calibration/batch_distribution_entropy": 0.9791813887040892, "calibration/batch_entropy_100bins": 0.9456414506810381, "calibration/batch_entropy_10bins": 0.9791813887040892, "calibration/batch_entropy_50bins": 0.970168505212247, "calibration/batch_uniqueness": 0.961700439453125, "calibration/buffer_distribution_entropy": 0.9989108611243351, "calibration/buffer_entropy_100bins": 0.9926973216876972, "calibration/buffer_entropy_10bins": 0.9989108611243351, "calibration/buffer_entropy_50bins": 0.9970219902732639, "calibration/confidence_entropy": 0.4907718157996378, "calibration/coverage@0%": 0.008203125, "calibration/coverage@1%": 0.008203125, "calibration/coverage@10%": 0.10078125, "calibration/coverage@15%": 0.197265625, "calibration/coverage@20%": 0.26953125, "calibration/coverage@25%": 0.36171875, "calibration/coverage@30%": 0.4375, "calibration/coverage@5%": 0.044921875, "calibration/ece": 0.13147094825039063, "calibration/mean_confidence": 0.4795633160871094, "calibration/prompt_uniqueness": 0.862939453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 779.8, "completions/max_terminated_length": 566.2, "completions/mean_length": 213.07626953125, "completions/mean_terminated_length": 212.94696350097655, "completions/min_length": 101.2, "completions/min_terminated_length": 101.2, "epoch": 0.672, "grad_norm": 0.000700996839441359, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 711878255.0, "reward": 0.9307149410247803, "reward_std": 0.07861108779907226, "rewards/accuracy_reward": 0.5162109375, "rewards/brier_reward": 0.7959470629692078, "rewards/confidence_uniqueness_reward": 0.961796760559082, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002726683299988508, "rewards/frontier_coverage_0": 0.1381845772266388, "rewards/frontier_coverage_1": 0.1381845772266388, "rewards/frontier_coverage_10": 0.13760078251361846, "rewards/frontier_coverage_15": 0.13690231442451478, "rewards/frontier_coverage_20": 0.12530189156532287, "rewards/frontier_coverage_25": 0.07160148993134499, "rewards/frontier_coverage_5": 0.1381845772266388, "rewards/frontier_ece_reward": 0.004131518257781863, "rewards/frontier_entropy_batch_reward": -0.1456966444849968, "signal/accuracy_reward/centered_abs_mean": 0.09827880859375, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12960606515407563, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049139404296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049139404296875, "signal/advantage_abs_mean": 0.06056781709194183, "signal/advantage_pre_scale_abs_mean": 0.06056781709194183, "signal/advantage_pre_scale_std": 0.09958604127168655, "signal/advantage_std": 0.09958604127168655, "signal/brier_reward/centered_abs_mean": 0.13191191256046295, "signal/brier_reward/group_bin_occupancy": 0.82578125, "signal/brier_reward/group_std_mean": 0.16879949271678923, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013191192038357257, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013191192038357257, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0151775436475873, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.84765625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01969280615448952, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00151775439735502, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00151775439735502, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025179087184369563, "signal/frontier_aurc_reward/group_bin_occupancy": 0.713671875, "signal/frontier_aurc_reward/group_std_mean": 0.00425442517735064, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.147385905322153e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.147385905322153e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18835416436195374, "signal/frontier_coverage_0/group_bin_occupancy": 0.852734375, "signal/frontier_coverage_0/group_std_mean": 0.239266437292099, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002354427147656679, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002354427147656679, "signal/frontier_coverage_1/centered_abs_mean": 0.18835416436195374, "signal/frontier_coverage_1/group_bin_occupancy": 0.852734375, "signal/frontier_coverage_1/group_std_mean": 0.239266437292099, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002354427147656679, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002354427147656679, "signal/frontier_coverage_10/centered_abs_mean": 0.18741922974586486, "signal/frontier_coverage_10/group_bin_occupancy": 0.853125, "signal/frontier_coverage_10/group_std_mean": 0.238098081946373, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002342740399762988, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002342740399762988, "signal/frontier_coverage_15/centered_abs_mean": 0.18593138456344604, "signal/frontier_coverage_15/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_15/group_std_mean": 0.23624544441699982, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023241423536092044, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023241423536092044, "signal/frontier_coverage_20/centered_abs_mean": 0.16277650594711304, "signal/frontier_coverage_20/group_bin_occupancy": 0.848828125, "signal/frontier_coverage_20/group_std_mean": 0.20805872082710267, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020347062963992357, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020347062963992357, "signal/frontier_coverage_25/centered_abs_mean": 0.08195126354694367, "signal/frontier_coverage_25/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_25/group_std_mean": 0.10580885410308838, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010243908269330858, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010243908269330858, "signal/frontier_coverage_5/centered_abs_mean": 0.18835416436195374, "signal/frontier_coverage_5/group_bin_occupancy": 0.852734375, "signal/frontier_coverage_5/group_std_mean": 0.239266437292099, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002354427147656679, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002354427147656679, "signal/frontier_ece_reward/centered_abs_mean": 0.005728235561400652, "signal/frontier_ece_reward/group_bin_occupancy": 0.650390625, "signal/frontier_ece_reward/group_std_mean": 0.00733137084171176, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005728235701099038, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005728235701099038, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1957707315683365, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2600939005613327, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01957707367837429, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01957707367837429, "step": 210 }, { "calibration/aurc": 0.35507882344852365, "calibration/batch_distribution_entropy": 0.9815242485981495, "calibration/batch_entropy_100bins": 0.9487245023983595, "calibration/batch_entropy_10bins": 0.9815242485981495, "calibration/batch_entropy_50bins": 0.9749153032528938, "calibration/batch_uniqueness": 0.9653167724609375, "calibration/buffer_distribution_entropy": 0.9989997128991707, "calibration/buffer_entropy_100bins": 0.9919129656798372, "calibration/buffer_entropy_10bins": 0.9989997128991707, "calibration/buffer_entropy_50bins": 0.99690645212335, "calibration/confidence_entropy": 0.5100705905072789, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.08671875, "calibration/coverage@15%": 0.11640625, "calibration/coverage@20%": 0.165234375, "calibration/coverage@25%": 0.349609375, "calibration/coverage@30%": 0.53671875, "calibration/coverage@5%": 0.001953125, "calibration/ece": 0.13693945359140625, "calibration/mean_confidence": 0.48373966934374996, "calibration/prompt_uniqueness": 0.875537109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 605.0, "completions/max_terminated_length": 605.0, "completions/mean_length": 214.97587890625, "completions/mean_terminated_length": 214.97587890625, "completions/min_length": 101.6, "completions/min_terminated_length": 101.6, "epoch": 0.688, "grad_norm": 0.0009223796660080552, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 729033528.0, "reward": 0.9396338343620301, "reward_std": 0.07829283773899079, "rewards/accuracy_reward": 0.5333984375, "rewards/brier_reward": 0.7883359670639039, "rewards/confidence_uniqueness_reward": 0.9647911071777344, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0029629127122461794, "rewards/frontier_coverage_0": 0.10885621532797814, "rewards/frontier_coverage_1": 0.10885621532797814, "rewards/frontier_coverage_10": 0.10867343470454216, "rewards/frontier_coverage_15": 0.10741532370448112, "rewards/frontier_coverage_20": 0.09064158499240875, "rewards/frontier_coverage_25": 0.05339468345046043, "rewards/frontier_coverage_5": 0.10885621532797814, "rewards/frontier_ece_reward": 0.0036856223829090594, "rewards/frontier_entropy_batch_reward": -0.11293288618326187, "signal/accuracy_reward/centered_abs_mean": 0.09931640625, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.1314438134431839, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049658203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049658203125, "signal/advantage_abs_mean": 0.06019651964306831, "signal/advantage_pre_scale_abs_mean": 0.06019651964306831, "signal/advantage_pre_scale_std": 0.10152793973684311, "signal/advantage_std": 0.10152793973684311, "signal/brier_reward/centered_abs_mean": 0.12910507768392562, "signal/brier_reward/group_bin_occupancy": 0.839453125, "signal/brier_reward/group_std_mean": 0.16621757447719573, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012910507991909981, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012910507991909981, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013120555877685547, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8921875, "signal/confidence_uniqueness_reward/group_std_mean": 0.016461556777358055, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001312055578455329, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001312055578455329, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002895374782383442, "signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375, "signal/frontier_aurc_reward/group_std_mean": 0.0050206175073981285, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6192184779793024e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6192184779793024e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17723233103752137, "signal/frontier_coverage_0/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_0/group_std_mean": 0.22633326649665833, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022154041100293396, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022154041100293396, "signal/frontier_coverage_1/centered_abs_mean": 0.17723233103752137, "signal/frontier_coverage_1/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_1/group_std_mean": 0.22633326649665833, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022154041100293396, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022154041100293396, "signal/frontier_coverage_10/centered_abs_mean": 0.17641493380069734, "signal/frontier_coverage_10/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_10/group_std_mean": 0.2253005772829056, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022051867563277483, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022051867563277483, "signal/frontier_coverage_15/centered_abs_mean": 0.17494137585163116, "signal/frontier_coverage_15/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_15/group_std_mean": 0.2234204888343811, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021867671981453895, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021867671981453895, "signal/frontier_coverage_20/centered_abs_mean": 0.14532059729099273, "signal/frontier_coverage_20/group_bin_occupancy": 0.855859375, "signal/frontier_coverage_20/group_std_mean": 0.18619788587093353, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001816507545299828, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001816507545299828, "signal/frontier_coverage_25/centered_abs_mean": 0.07450771033763885, "signal/frontier_coverage_25/group_bin_occupancy": 0.9078125, "signal/frontier_coverage_25/group_std_mean": 0.09580143839120865, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009313463466241956, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009313463466241956, "signal/frontier_coverage_5/centered_abs_mean": 0.17723233103752137, "signal/frontier_coverage_5/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_5/group_std_mean": 0.22633326649665833, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022154041100293396, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022154041100293396, "signal/frontier_ece_reward/centered_abs_mean": 0.005490542016923427, "signal/frontier_ece_reward/group_bin_occupancy": 0.653515625, "signal/frontier_ece_reward/group_std_mean": 0.006997937150299549, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005490542040206492, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005490542040206492, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17429947555065156, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2336766630411148, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017429948039352893, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017429948039352893, "step": 215 }, { "calibration/aurc": 0.25269958125350817, "calibration/batch_distribution_entropy": 0.977598008234402, "calibration/batch_entropy_100bins": 0.9456781551973619, "calibration/batch_entropy_10bins": 0.977598008234402, "calibration/batch_entropy_50bins": 0.972391770560862, "calibration/batch_uniqueness": 0.964692861372854, "calibration/buffer_distribution_entropy": 0.9990104211633648, "calibration/buffer_entropy_100bins": 0.9910756654810251, "calibration/buffer_entropy_10bins": 0.9990104211633648, "calibration/buffer_entropy_50bins": 0.9967262526332157, "calibration/confidence_entropy": 0.47842673135918395, "calibration/coverage@0%": 0.006640625, "calibration/coverage@1%": 0.006640625, "calibration/coverage@10%": 0.081640625, "calibration/coverage@15%": 0.21738243028375734, "calibration/coverage@20%": 0.3651242967221135, "calibration/coverage@25%": 0.5608656433463797, "calibration/coverage@30%": 0.7155829562133073, "calibration/coverage@5%": 0.006640625, "calibration/ece": 0.08634725362059685, "calibration/mean_confidence": 0.5315407554914384, "calibration/prompt_uniqueness": 0.8584324800013008, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 998.4, "completions/max_terminated_length": 575.6, "completions/mean_length": 211.2060546875, "completions/mean_terminated_length": 210.94761962890624, "completions/min_length": 99.4, "completions/min_terminated_length": 99.4, "epoch": 0.704, "grad_norm": 0.000813114398624748, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 746062422.0, "reward": 0.9449209213256836, "reward_std": 0.07712907642126084, "rewards/accuracy_reward": 0.54228515625, "rewards/brier_reward": 0.8041618227958679, "rewards/confidence_uniqueness_reward": 0.9660219669342041, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0028705994598567488, "rewards/frontier_coverage_0": 0.11365007087588311, "rewards/frontier_coverage_1": 0.11365007087588311, "rewards/frontier_coverage_10": 0.11318954974412918, "rewards/frontier_coverage_15": 0.1123675525188446, "rewards/frontier_coverage_20": 0.09421491771936416, "rewards/frontier_coverage_25": 0.05829355418682099, "rewards/frontier_coverage_5": 0.11365007087588311, "rewards/frontier_ece_reward": 0.004210776835680008, "rewards/frontier_entropy_batch_reward": -0.12466455399990081, "signal/accuracy_reward/centered_abs_mean": 0.089410400390625, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.1160278245806694, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0447052001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0447052001953125, "signal/advantage_abs_mean": 0.06003868728876114, "signal/advantage_pre_scale_abs_mean": 0.06003868728876114, "signal/advantage_pre_scale_std": 0.10074197500944138, "signal/advantage_std": 0.10074197500944138, "signal/brier_reward/centered_abs_mean": 0.12351735383272171, "signal/brier_reward/group_bin_occupancy": 0.848046875, "signal/brier_reward/group_std_mean": 0.15841708183288575, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012351735681295394, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012351735681295394, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012723441608250141, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8484375, "signal/confidence_uniqueness_reward/group_std_mean": 0.01709325034171343, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012723441468551755, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012723441468551755, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028855173382908105, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71953125, "signal/frontier_aurc_reward/group_std_mean": 0.004696205072104931, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.606896862038411e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.606896862038411e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15936529040336608, "signal/frontier_coverage_0/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_0/group_std_mean": 0.20337989330291747, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00199206608813256, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00199206608813256, "signal/frontier_coverage_1/centered_abs_mean": 0.15936529040336608, "signal/frontier_coverage_1/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_1/group_std_mean": 0.20337989330291747, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00199206608813256, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00199206608813256, "signal/frontier_coverage_10/centered_abs_mean": 0.15849925875663756, "signal/frontier_coverage_10/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_10/group_std_mean": 0.2023030012845993, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019812406972050667, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019812406972050667, "signal/frontier_coverage_15/centered_abs_mean": 0.15680376291275025, "signal/frontier_coverage_15/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_15/group_std_mean": 0.2001950114965439, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001960047124885023, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001960047124885023, "signal/frontier_coverage_20/centered_abs_mean": 0.12259230017662048, "signal/frontier_coverage_20/group_bin_occupancy": 0.848046875, "signal/frontier_coverage_20/group_std_mean": 0.15736171305179597, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015324037754908203, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015324037754908203, "signal/frontier_coverage_25/centered_abs_mean": 0.06701800152659416, "signal/frontier_coverage_25/group_bin_occupancy": 0.911328125, "signal/frontier_coverage_25/group_std_mean": 0.08619940429925918, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008377250749617815, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008377250749617815, "signal/frontier_coverage_5/centered_abs_mean": 0.15936529040336608, "signal/frontier_coverage_5/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_5/group_std_mean": 0.20337989330291747, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00199206608813256, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00199206608813256, "signal/frontier_ece_reward/centered_abs_mean": 0.0054263660684227945, "signal/frontier_ece_reward/group_bin_occupancy": 0.649609375, "signal/frontier_ece_reward/group_std_mean": 0.006871442683041096, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005426366347819567, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005426366347819567, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17846384644508362, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23761946856975555, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01784638427197933, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01784638427197933, "step": 220 }, { "calibration/aurc": 0.24251946895628765, "calibration/batch_distribution_entropy": 0.9893663529287997, "calibration/batch_entropy_100bins": 0.9515162266162939, "calibration/batch_entropy_10bins": 0.9893663529287997, "calibration/batch_entropy_50bins": 0.9806845012920032, "calibration/batch_uniqueness": 0.9671478271484375, "calibration/buffer_distribution_entropy": 0.9989959337433355, "calibration/buffer_entropy_100bins": 0.9900516903881215, "calibration/buffer_entropy_10bins": 0.9989959337433355, "calibration/buffer_entropy_50bins": 0.9964595474824691, "calibration/confidence_entropy": 0.49330079316439557, "calibration/coverage@0%": 0.040234375, "calibration/coverage@1%": 0.05234375, "calibration/coverage@10%": 0.209765625, "calibration/coverage@15%": 0.328515625, "calibration/coverage@20%": 0.45078125, "calibration/coverage@25%": 0.54765625, "calibration/coverage@30%": 0.640625, "calibration/coverage@5%": 0.1265625, "calibration/ece": 0.12774077019609376, "calibration/mean_confidence": 0.5107396985539063, "calibration/prompt_uniqueness": 0.870361328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.6, "completions/max_terminated_length": 466.6, "completions/mean_length": 209.7314453125, "completions/mean_terminated_length": 209.7314453125, "completions/min_length": 95.4, "completions/min_terminated_length": 95.4, "epoch": 0.72, "grad_norm": 0.0009440298308618367, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 763219928.0, "reward": 0.9544408559799195, "reward_std": 0.07761229127645493, "rewards/accuracy_reward": 0.56044921875, "rewards/brier_reward": 0.8082213044166565, "rewards/confidence_uniqueness_reward": 0.9663070678710938, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0023990374989807605, "rewards/frontier_coverage_0": 0.10608717054128647, "rewards/frontier_coverage_1": 0.10608717054128647, "rewards/frontier_coverage_10": 0.10574176013469697, "rewards/frontier_coverage_15": 0.10523617118597031, "rewards/frontier_coverage_20": 0.08412261456251144, "rewards/frontier_coverage_25": 0.05517871528863907, "rewards/frontier_coverage_5": 0.10608717054128647, "rewards/frontier_ece_reward": 0.00386471445672214, "rewards/frontier_entropy_batch_reward": -0.119498211145401, "signal/accuracy_reward/centered_abs_mean": 0.094354248046875, "signal/accuracy_reward/group_bin_occupancy": 0.171484375, "signal/accuracy_reward/group_std_mean": 0.12810063362121582, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0471771240234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0471771240234375, "signal/advantage_abs_mean": 0.05871965810656547, "signal/advantage_pre_scale_abs_mean": 0.05871965810656547, "signal/advantage_pre_scale_std": 0.09942338019609451, "signal/advantage_std": 0.09942338019609451, "signal/brier_reward/centered_abs_mean": 0.11699345856904983, "signal/brier_reward/group_bin_occupancy": 0.856640625, "signal/brier_reward/group_std_mean": 0.150360769033432, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011699345521628856, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011699345521628856, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012291359901428222, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8765625, "signal/confidence_uniqueness_reward/group_std_mean": 0.015424015931785107, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012291359947994352, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012291359947994352, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023271431447938083, "signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875, "signal/frontier_aurc_reward/group_std_mean": 0.003916347119957209, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.908928909164388e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.908928909164388e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16398767232894898, "signal/frontier_coverage_0/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_0/group_std_mean": 0.2096702426671982, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002049846015870571, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002049846015870571, "signal/frontier_coverage_1/centered_abs_mean": 0.16398767232894898, "signal/frontier_coverage_1/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_1/group_std_mean": 0.2096702426671982, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002049846015870571, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002049846015870571, "signal/frontier_coverage_10/centered_abs_mean": 0.16326985955238343, "signal/frontier_coverage_10/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_10/group_std_mean": 0.20876802504062653, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020408732816576958, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020408732816576958, "signal/frontier_coverage_15/centered_abs_mean": 0.1617922306060791, "signal/frontier_coverage_15/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_15/group_std_mean": 0.20689028203487397, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002022402756847441, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002022402756847441, "signal/frontier_coverage_20/centered_abs_mean": 0.1167424589395523, "signal/frontier_coverage_20/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_20/group_std_mean": 0.15021034181118012, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014592807507142424, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014592807507142424, "signal/frontier_coverage_25/centered_abs_mean": 0.06388061791658402, "signal/frontier_coverage_25/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_25/group_std_mean": 0.08200441002845764, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007985077565535903, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007985077565535903, "signal/frontier_coverage_5/centered_abs_mean": 0.16398767232894898, "signal/frontier_coverage_5/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_5/group_std_mean": 0.2096702426671982, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002049846015870571, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002049846015870571, "signal/frontier_ece_reward/centered_abs_mean": 0.004942002054303884, "signal/frontier_ece_reward/group_bin_occupancy": 0.648828125, "signal/frontier_ece_reward/group_std_mean": 0.006254712212830782, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004942002124153078, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004942002124153078, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1822981506586075, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.770703125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24075571000576018, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018229815922677518, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018229815922677518, "step": 225 }, { "calibration/aurc": 0.265978066281768, "calibration/batch_distribution_entropy": 0.9789357360503427, "calibration/batch_entropy_100bins": 0.9431321978969809, "calibration/batch_entropy_10bins": 0.9789357360503427, "calibration/batch_entropy_50bins": 0.9697579699878605, "calibration/batch_uniqueness": 0.9652801513671875, "calibration/buffer_distribution_entropy": 0.9990033919158412, "calibration/buffer_entropy_100bins": 0.9889349207161073, "calibration/buffer_entropy_10bins": 0.9990033919158412, "calibration/buffer_entropy_50bins": 0.9962503665279716, "calibration/confidence_entropy": 0.48441087660579196, "calibration/coverage@0%": 0.012109375, "calibration/coverage@1%": 0.012109375, "calibration/coverage@10%": 0.071875, "calibration/coverage@15%": 0.2671875, "calibration/coverage@20%": 0.358984375, "calibration/coverage@25%": 0.51796875, "calibration/coverage@30%": 0.63671875, "calibration/coverage@5%": 0.030859375, "calibration/ece": 0.11656946337539065, "calibration/mean_confidence": 0.5445783087582032, "calibration/prompt_uniqueness": 0.864501953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 652.8, "completions/max_terminated_length": 652.8, "completions/mean_length": 206.99716796875, "completions/mean_terminated_length": 206.99716796875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.736, "grad_norm": 0.0008532933425158262, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 780279163.0, "reward": 0.9536020278930664, "reward_std": 0.07359256446361542, "rewards/accuracy_reward": 0.55859375, "rewards/brier_reward": 0.7990551352500915, "rewards/confidence_uniqueness_reward": 0.9658401489257813, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002780623361468315, "rewards/frontier_coverage_0": 0.10266576707363129, "rewards/frontier_coverage_1": 0.10266576707363129, "rewards/frontier_coverage_10": 0.10229146480560303, "rewards/frontier_coverage_15": 0.10120062232017517, "rewards/frontier_coverage_20": 0.08240518420934677, "rewards/frontier_coverage_25": 0.055143621563911435, "rewards/frontier_coverage_5": 0.10260389745235443, "rewards/frontier_ece_reward": 0.003426346043124795, "rewards/frontier_entropy_batch_reward": -0.10604460686445236, "signal/accuracy_reward/centered_abs_mean": 0.08675537109375, "signal/accuracy_reward/group_bin_occupancy": 0.166015625, "signal/accuracy_reward/group_std_mean": 0.11478563249111176, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043377685546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.043377685546875, "signal/advantage_abs_mean": 0.056449040025472644, "signal/advantage_pre_scale_abs_mean": 0.056449040025472644, "signal/advantage_pre_scale_std": 0.09533527195453644, "signal/advantage_std": 0.09533527195453644, "signal/brier_reward/centered_abs_mean": 0.12223577499389648, "signal/brier_reward/group_bin_occupancy": 0.841015625, "signal/brier_reward/group_std_mean": 0.15614095330238342, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012223577871918679, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012223577871918679, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012729287147521973, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.867578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016052869893610478, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012729287147521973, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012729287147521973, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026509141782298682, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375, "signal/frontier_aurc_reward/group_std_mean": 0.004276033490896225, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.313642882858403e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.313642882858403e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1618587166070938, "signal/frontier_coverage_0/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_0/group_std_mean": 0.20602332055568695, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002023234078660607, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002023234078660607, "signal/frontier_coverage_1/centered_abs_mean": 0.1618587166070938, "signal/frontier_coverage_1/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_1/group_std_mean": 0.20602332055568695, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002023234078660607, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002023234078660607, "signal/frontier_coverage_10/centered_abs_mean": 0.16095443964004516, "signal/frontier_coverage_10/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_10/group_std_mean": 0.2048912912607193, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020119305001571773, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020119305001571773, "signal/frontier_coverage_15/centered_abs_mean": 0.15913594663143157, "signal/frontier_coverage_15/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_15/group_std_mean": 0.2025930851697922, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019891994539648294, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019891994539648294, "signal/frontier_coverage_20/centered_abs_mean": 0.11051186323165893, "signal/frontier_coverage_20/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_20/group_std_mean": 0.14187619388103484, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013813983183354138, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013813983183354138, "signal/frontier_coverage_25/centered_abs_mean": 0.06287488490343093, "signal/frontier_coverage_25/group_bin_occupancy": 0.921484375, "signal/frontier_coverage_25/group_std_mean": 0.08044356256723403, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007859360543079674, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007859360543079674, "signal/frontier_coverage_5/centered_abs_mean": 0.16175731718540193, "signal/frontier_coverage_5/group_bin_occupancy": 0.865625, "signal/frontier_coverage_5/group_std_mean": 0.20589107573032378, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00202196657191962, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00202196657191962, "signal/frontier_ece_reward/centered_abs_mean": 0.004867816995829344, "signal/frontier_ece_reward/group_bin_occupancy": 0.647265625, "signal/frontier_ece_reward/group_std_mean": 0.006159187015146017, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00048678170423954725, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00048678170423954725, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16900931298732758, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.761328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23038658797740935, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016900931484997272, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016900931484997272, "step": 230 }, { "calibration/aurc": 0.28684687224873423, "calibration/batch_distribution_entropy": 0.9762817415979625, "calibration/batch_entropy_100bins": 0.9470552678321782, "calibration/batch_entropy_10bins": 0.9762817415979625, "calibration/batch_entropy_50bins": 0.9694219542495217, "calibration/batch_uniqueness": 0.9615577406598019, "calibration/buffer_distribution_entropy": 0.9989136260467595, "calibration/buffer_entropy_100bins": 0.9878533321218999, "calibration/buffer_entropy_10bins": 0.9989136260467595, "calibration/buffer_entropy_50bins": 0.995948725002774, "calibration/confidence_entropy": 0.4667618109383035, "calibration/coverage@0%": 0.009766389432485322, "calibration/coverage@1%": 0.009766389432485322, "calibration/coverage@10%": 0.11367263943248532, "calibration/coverage@15%": 0.20313723091976515, "calibration/coverage@20%": 0.3556522137964775, "calibration/coverage@25%": 0.4596180895303327, "calibration/coverage@30%": 0.5514562438845401, "calibration/coverage@5%": 0.009766389432485322, "calibration/ece": 0.1238677625992218, "calibration/mean_confidence": 0.46819689665010245, "calibration/prompt_uniqueness": 0.8621210104220864, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 685.6, "completions/max_terminated_length": 473.4, "completions/mean_length": 205.50390625, "completions/mean_terminated_length": 205.37394104003906, "completions/min_length": 91.8, "completions/min_terminated_length": 91.8, "epoch": 0.752, "grad_norm": 0.0009124129428528249, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 797610723.0, "reward": 0.951321005821228, "reward_std": 0.07593754529953003, "rewards/accuracy_reward": 0.56328125, "rewards/brier_reward": 0.794294559955597, "rewards/confidence_uniqueness_reward": 0.9645552635192871, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0026896405033767223, "rewards/frontier_coverage_0": 0.09672394786030054, "rewards/frontier_coverage_1": 0.09672394786030054, "rewards/frontier_coverage_10": 0.09659410417079925, "rewards/frontier_coverage_15": 0.09543101899325848, "rewards/frontier_coverage_20": 0.0684605619404465, "rewards/frontier_coverage_25": 0.04791465476155281, "rewards/frontier_coverage_5": 0.09664845261722803, "rewards/frontier_ece_reward": 0.0033146409783512353, "rewards/frontier_entropy_batch_reward": -0.1393482729792595, "signal/accuracy_reward/centered_abs_mean": 0.0828857421875, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.1134518638253212, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04144287109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04144287109375, "signal/advantage_abs_mean": 0.057659880816936494, "signal/advantage_pre_scale_abs_mean": 0.057659880816936494, "signal/advantage_pre_scale_std": 0.09769354313611985, "signal/advantage_std": 0.09769354313611985, "signal/brier_reward/centered_abs_mean": 0.11685173511505127, "signal/brier_reward/group_bin_occupancy": 0.8453125, "signal/brier_reward/group_std_mean": 0.15071351826190948, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011685173958539963, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011685173958539963, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013762599974870681, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87109375, "signal/confidence_uniqueness_reward/group_std_mean": 0.017685002461075783, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013762600487098099, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013762600487098099, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002494478039443493, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71171875, "signal/frontier_aurc_reward/group_std_mean": 0.0040936945006251335, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.118097665719688e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.118097665719688e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1533576190471649, "signal/frontier_coverage_0/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_0/group_std_mean": 0.19774354100227357, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00191697021946311, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00191697021946311, "signal/frontier_coverage_1/centered_abs_mean": 0.1533576190471649, "signal/frontier_coverage_1/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_1/group_std_mean": 0.19774354100227357, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00191697021946311, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00191697021946311, "signal/frontier_coverage_10/centered_abs_mean": 0.15258175432682036, "signal/frontier_coverage_10/group_bin_occupancy": 0.871484375, "signal/frontier_coverage_10/group_std_mean": 0.19675518870353698, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019072720315307379, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019072720315307379, "signal/frontier_coverage_15/centered_abs_mean": 0.15079548060894013, "signal/frontier_coverage_15/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_15/group_std_mean": 0.19448258876800537, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018849435495212675, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018849435495212675, "signal/frontier_coverage_20/centered_abs_mean": 0.10275738835334777, "signal/frontier_coverage_20/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_20/group_std_mean": 0.13350152522325515, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012844673823565246, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012844673823565246, "signal/frontier_coverage_25/centered_abs_mean": 0.060180126875638965, "signal/frontier_coverage_25/group_bin_occupancy": 0.918359375, "signal/frontier_coverage_25/group_std_mean": 0.07750741690397263, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007522516185417772, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007522516185417772, "signal/frontier_coverage_5/centered_abs_mean": 0.15328127443790435, "signal/frontier_coverage_5/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_5/group_std_mean": 0.19764436781406403, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019160159630700947, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019160159630700947, "signal/frontier_ece_reward/centered_abs_mean": 0.004672563914209604, "signal/frontier_ece_reward/group_bin_occupancy": 0.64765625, "signal/frontier_ece_reward/group_std_mean": 0.005903707630932331, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004672564100474119, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004672564100474119, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2062768131494522, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2728250831365585, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02062768116593361, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02062768116593361, "step": 235 }, { "calibration/aurc": 0.27721551684829293, "calibration/batch_distribution_entropy": 0.9814844594182706, "calibration/batch_entropy_100bins": 0.9523715157977393, "calibration/batch_entropy_10bins": 0.9814844594182706, "calibration/batch_entropy_50bins": 0.9751030931904451, "calibration/batch_uniqueness": 0.9663785107122871, "calibration/buffer_distribution_entropy": 0.9989660116076141, "calibration/buffer_entropy_100bins": 0.9868567401278003, "calibration/buffer_entropy_10bins": 0.9989660116076141, "calibration/buffer_entropy_50bins": 0.9958389798404077, "calibration/confidence_entropy": 0.5004450542923582, "calibration/coverage@0%": 0.07227632705479452, "calibration/coverage@1%": 0.10938570205479452, "calibration/coverage@10%": 0.21955647627201563, "calibration/coverage@15%": 0.2676087022994129, "calibration/coverage@20%": 0.33362891389432486, "calibration/coverage@25%": 0.4402993517612524, "calibration/coverage@30%": 0.5215891022504893, "calibration/coverage@5%": 0.15978091364970645, "calibration/ece": 0.16065055711412365, "calibration/mean_confidence": 0.48663652354730924, "calibration/prompt_uniqueness": 0.880144990569719, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 733.4, "completions/max_terminated_length": 535.2, "completions/mean_length": 210.1384765625, "completions/mean_terminated_length": 210.0097869873047, "completions/min_length": 94.4, "completions/min_terminated_length": 94.4, "epoch": 0.768, "grad_norm": 0.0009373911889269948, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 814695245.0, "reward": 0.9313406467437744, "reward_std": 0.07817895561456681, "rewards/accuracy_reward": 0.514453125, "rewards/brier_reward": 0.8032928228378295, "rewards/confidence_uniqueness_reward": 0.9653749227523803, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002717023529112339, "rewards/frontier_coverage_0": 0.1328577607870102, "rewards/frontier_coverage_1": 0.1328577607870102, "rewards/frontier_coverage_10": 0.13257997035980223, "rewards/frontier_coverage_15": 0.13016380369663239, "rewards/frontier_coverage_20": 0.09398490190505981, "rewards/frontier_coverage_25": 0.057968994975090025, "rewards/frontier_coverage_5": 0.1328577607870102, "rewards/frontier_ece_reward": 0.003562742657959461, "rewards/frontier_entropy_batch_reward": -0.13192067593336104, "signal/accuracy_reward/centered_abs_mean": 0.08997802734375, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.12250371724367141, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044989013671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044989013671875, "signal/advantage_abs_mean": 0.0592110738158226, "signal/advantage_pre_scale_abs_mean": 0.0592110738158226, "signal/advantage_pre_scale_std": 0.10076282024383545, "signal/advantage_std": 0.10076282024383545, "signal/brier_reward/centered_abs_mean": 0.1197770431637764, "signal/brier_reward/group_bin_occupancy": 0.852734375, "signal/brier_reward/group_std_mean": 0.15326233208179474, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011977704800665378, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011977704800665378, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012569081410765648, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016215594485402107, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012569081503897906, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012569081503897906, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002442924352362752, "signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375, "signal/frontier_aurc_reward/group_std_mean": 0.004164928989484906, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.053655600524507e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.053655600524507e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1612669676542282, "signal/frontier_coverage_0/group_bin_occupancy": 0.871875, "signal/frontier_coverage_0/group_std_mean": 0.2072072833776474, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020158371888101103, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020158371888101103, "signal/frontier_coverage_1/centered_abs_mean": 0.1612669676542282, "signal/frontier_coverage_1/group_bin_occupancy": 0.871875, "signal/frontier_coverage_1/group_std_mean": 0.2072072833776474, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020158371888101103, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020158371888101103, "signal/frontier_coverage_10/centered_abs_mean": 0.16074672639369963, "signal/frontier_coverage_10/group_bin_occupancy": 0.871484375, "signal/frontier_coverage_10/group_std_mean": 0.20652774572372437, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020093340426683426, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020093340426683426, "signal/frontier_coverage_15/centered_abs_mean": 0.15720563530921935, "signal/frontier_coverage_15/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_15/group_std_mean": 0.20198263525962828, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001965070399455726, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001965070399455726, "signal/frontier_coverage_20/centered_abs_mean": 0.10369997471570969, "signal/frontier_coverage_20/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_20/group_std_mean": 0.13390834033489227, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012962497072294354, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012962497072294354, "signal/frontier_coverage_25/centered_abs_mean": 0.06143885999917984, "signal/frontier_coverage_25/group_bin_occupancy": 0.928515625, "signal/frontier_coverage_25/group_std_mean": 0.07899406105279923, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007679857430048287, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007679857430048287, "signal/frontier_coverage_5/centered_abs_mean": 0.1612669676542282, "signal/frontier_coverage_5/group_bin_occupancy": 0.871875, "signal/frontier_coverage_5/group_std_mean": 0.2072072833776474, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020158371888101103, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020158371888101103, "signal/frontier_ece_reward/centered_abs_mean": 0.00437048701569438, "signal/frontier_ece_reward/group_bin_occupancy": 0.662109375, "signal/frontier_ece_reward/group_std_mean": 0.005594444740563631, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000437048717867583, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000437048717867583, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18786839842796327, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.761328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2498662382364273, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01878684014081955, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01878684014081955, "step": 240 }, { "calibration/aurc": 0.3254199114145034, "calibration/batch_distribution_entropy": 0.9789490119022005, "calibration/batch_entropy_100bins": 0.9479515107725032, "calibration/batch_entropy_10bins": 0.9789490119022005, "calibration/batch_entropy_50bins": 0.9707285129333456, "calibration/batch_uniqueness": 0.9646381132895101, "calibration/buffer_distribution_entropy": 0.9988574494603905, "calibration/buffer_entropy_100bins": 0.9858469419972238, "calibration/buffer_entropy_10bins": 0.9988574494603905, "calibration/buffer_entropy_50bins": 0.995701699935891, "calibration/confidence_entropy": 0.4700687621350797, "calibration/coverage@0%": 0.041083026960784315, "calibration/coverage@1%": 0.09655177696078432, "calibration/coverage@10%": 0.21512867647058825, "calibration/coverage@15%": 0.2503216911764706, "calibration/coverage@20%": 0.2843673406862745, "calibration/coverage@25%": 0.3223023897058824, "calibration/coverage@30%": 0.4446936274509804, "calibration/coverage@5%": 0.147359068627451, "calibration/ece": 0.154079998576708, "calibration/mean_confidence": 0.5222582954639936, "calibration/prompt_uniqueness": 0.8567335069444445, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1005.2, "completions/max_terminated_length": 677.6, "completions/mean_length": 206.58896484375, "completions/mean_terminated_length": 206.06998596191406, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.784, "grad_norm": 0.0008056263905018568, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 831985084.0, "reward": 0.9469256639480591, "reward_std": 0.0791924849152565, "rewards/accuracy_reward": 0.5556640625, "rewards/brier_reward": 0.785396134853363, "rewards/confidence_uniqueness_reward": 0.9655014038085937, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0030867070890963078, "rewards/frontier_coverage_0": 0.08916651010513306, "rewards/frontier_coverage_1": 0.08916651010513306, "rewards/frontier_coverage_10": 0.08891836106777191, "rewards/frontier_coverage_15": 0.08784883618354797, "rewards/frontier_coverage_20": 0.0627571128308773, "rewards/frontier_coverage_25": 0.04613062590360641, "rewards/frontier_coverage_5": 0.08916651010513306, "rewards/frontier_ece_reward": 0.0026697968831285836, "rewards/frontier_entropy_batch_reward": -0.1289479538798332, "signal/accuracy_reward/centered_abs_mean": 0.09161376953125, "signal/accuracy_reward/group_bin_occupancy": 0.170703125, "signal/accuracy_reward/group_std_mean": 0.12455501407384872, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045806884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045806884765625, "signal/advantage_abs_mean": 0.05967723429203033, "signal/advantage_pre_scale_abs_mean": 0.05967723429203033, "signal/advantage_pre_scale_std": 0.10045831054449081, "signal/advantage_std": 0.10045831054449081, "signal/brier_reward/centered_abs_mean": 0.12030695676803589, "signal/brier_reward/group_bin_occupancy": 0.85625, "signal/brier_reward/group_std_mean": 0.15410683453083038, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01203069593757391, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01203069593757391, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013250499032437801, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.867578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018012562207877635, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013250499032437801, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013250499032437801, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.002425827318802476, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028906268067657947, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625, "signal/frontier_aurc_reward/group_std_mean": 0.004852446913719177, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.613283406593837e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.613283406593837e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16021213233470916, "signal/frontier_coverage_0/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_0/group_std_mean": 0.2059779554605484, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020026518031954765, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020026518031954765, "signal/frontier_coverage_1/centered_abs_mean": 0.16021213233470916, "signal/frontier_coverage_1/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_1/group_std_mean": 0.2059779554605484, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020026518031954765, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020026518031954765, "signal/frontier_coverage_10/centered_abs_mean": 0.15919291973114014, "signal/frontier_coverage_10/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_10/group_std_mean": 0.20466985404491425, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001989911496639252, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001989911496639252, "signal/frontier_coverage_15/centered_abs_mean": 0.15465636551380157, "signal/frontier_coverage_15/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_15/group_std_mean": 0.19887435138225557, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019332046154886483, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019332046154886483, "signal/frontier_coverage_20/centered_abs_mean": 0.09457768499851227, "signal/frontier_coverage_20/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_20/group_std_mean": 0.12258463650941849, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001182221109047532, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001182221109047532, "signal/frontier_coverage_25/centered_abs_mean": 0.05887412428855896, "signal/frontier_coverage_25/group_bin_occupancy": 0.93203125, "signal/frontier_coverage_25/group_std_mean": 0.0750406637787819, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007359265931881964, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007359265931881964, "signal/frontier_coverage_5/centered_abs_mean": 0.16021213233470916, "signal/frontier_coverage_5/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_5/group_std_mean": 0.2059779554605484, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020026518031954765, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020026518031954765, "signal/frontier_ece_reward/centered_abs_mean": 0.004312580823898316, "signal/frontier_ece_reward/group_bin_occupancy": 0.6515625, "signal/frontier_ece_reward/group_std_mean": 0.00546288751065731, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043125808006152513, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043125808006152513, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1936631292104721, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25820142924785616, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01936631351709366, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01936631351709366, "step": 245 }, { "calibration/aurc": 0.1967928711004556, "calibration/batch_distribution_entropy": 0.9742510706268096, "calibration/batch_entropy_100bins": 0.9507340151497976, "calibration/batch_entropy_10bins": 0.9742510706268096, "calibration/batch_entropy_50bins": 0.972856437794905, "calibration/batch_uniqueness": 0.9643646240234375, "calibration/buffer_distribution_entropy": 0.9987200173841982, "calibration/buffer_entropy_100bins": 0.9845965356019161, "calibration/buffer_entropy_10bins": 0.9987200173841982, "calibration/buffer_entropy_50bins": 0.9954272022225975, "calibration/confidence_entropy": 0.4909248938909892, "calibration/coverage@0%": 0.046875, "calibration/coverage@1%": 0.046875, "calibration/coverage@10%": 0.280078125, "calibration/coverage@15%": 0.51328125, "calibration/coverage@20%": 0.592578125, "calibration/coverage@25%": 0.655859375, "calibration/coverage@30%": 0.7453125, "calibration/coverage@5%": 0.145703125, "calibration/ece": 0.11571823238789063, "calibration/mean_confidence": 0.4886489551121094, "calibration/prompt_uniqueness": 0.86171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 576.6, "completions/max_terminated_length": 576.6, "completions/mean_length": 207.0623046875, "completions/mean_terminated_length": 207.0623046875, "completions/min_length": 96.8, "completions/min_terminated_length": 96.8, "epoch": 0.8, "grad_norm": 0.0010407047811895609, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 849115962.0, "reward": 0.9649770140647889, "reward_std": 0.07340085953474045, "rewards/accuracy_reward": 0.58837890625, "rewards/brier_reward": 0.8119970202445984, "rewards/confidence_uniqueness_reward": 0.9659217834472656, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00260907681658864, "rewards/frontier_coverage_0": 0.0923803374171257, "rewards/frontier_coverage_1": 0.0923803374171257, "rewards/frontier_coverage_10": 0.09237077087163925, "rewards/frontier_coverage_15": 0.08902214169502258, "rewards/frontier_coverage_20": 0.06303619369864463, "rewards/frontier_coverage_25": 0.04901153296232223, "rewards/frontier_coverage_5": 0.0923803374171257, "rewards/frontier_ece_reward": 0.0028331642039120196, "rewards/frontier_entropy_batch_reward": -0.14387290179729462, "signal/accuracy_reward/centered_abs_mean": 0.084429931640625, "signal/accuracy_reward/group_bin_occupancy": 0.165234375, "signal/accuracy_reward/group_std_mean": 0.11211418360471725, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422149658203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0422149658203125, "signal/advantage_abs_mean": 0.05633275434374809, "signal/advantage_pre_scale_abs_mean": 0.05633275434374809, "signal/advantage_pre_scale_std": 0.09726964086294174, "signal/advantage_std": 0.09726964086294174, "signal/brier_reward/centered_abs_mean": 0.10657454878091813, "signal/brier_reward/group_bin_occupancy": 0.852734375, "signal/brier_reward/group_std_mean": 0.13808045983314515, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010657455027103423, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010657455027103423, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012256479263305664, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.877734375, "signal/confidence_uniqueness_reward/group_std_mean": 0.015532337687909603, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012256479589268566, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012256479589268566, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024628740502521396, "signal/frontier_aurc_reward/group_bin_occupancy": 0.696875, "signal/frontier_aurc_reward/group_std_mean": 0.004201717115938664, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.078592744714115e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.078592744714115e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14268072247505187, "signal/frontier_coverage_0/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_0/group_std_mean": 0.1830669164657593, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017835090635344385, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017835090635344385, "signal/frontier_coverage_1/centered_abs_mean": 0.14268072247505187, "signal/frontier_coverage_1/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_1/group_std_mean": 0.1830669164657593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017835090635344385, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017835090635344385, "signal/frontier_coverage_10/centered_abs_mean": 0.1418178841471672, "signal/frontier_coverage_10/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_10/group_std_mean": 0.18199937641620637, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001772723556496203, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001772723556496203, "signal/frontier_coverage_15/centered_abs_mean": 0.1328089103102684, "signal/frontier_coverage_15/group_bin_occupancy": 0.871875, "signal/frontier_coverage_15/group_std_mean": 0.17064056396484376, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016601114068180323, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016601114068180323, "signal/frontier_coverage_20/centered_abs_mean": 0.08025226593017579, "signal/frontier_coverage_20/group_bin_occupancy": 0.875, "signal/frontier_coverage_20/group_std_mean": 0.10454665571451187, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00100315329618752, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00100315329618752, "signal/frontier_coverage_25/centered_abs_mean": 0.051503103226423264, "signal/frontier_coverage_25/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_25/group_std_mean": 0.06675118654966354, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006437887786887586, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006437887786887586, "signal/frontier_coverage_5/centered_abs_mean": 0.14268072247505187, "signal/frontier_coverage_5/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_5/group_std_mean": 0.1830669164657593, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017835090635344385, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017835090635344385, "signal/frontier_ece_reward/centered_abs_mean": 0.004069770174100995, "signal/frontier_ece_reward/group_bin_occupancy": 0.637890625, "signal/frontier_ece_reward/group_std_mean": 0.005134090967476368, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004069770220667124, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004069770220667124, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19587229192256927, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2571956992149353, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01958722956478596, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01958722956478596, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4496095895830745, "eval_calibration/batch_distribution_entropy": 0.9144992675252371, "eval_calibration/batch_entropy_100bins": 0.7043401650925953, "eval_calibration/batch_entropy_10bins": 0.9144992675252371, "eval_calibration/batch_entropy_50bins": 0.7778935861386687, "eval_calibration/batch_uniqueness": 0.9091796875, "eval_calibration/buffer_distribution_entropy": 0.9987979675445213, "eval_calibration/buffer_entropy_100bins": 0.9837722377098301, "eval_calibration/buffer_entropy_10bins": 0.9987979675445213, "eval_calibration/buffer_entropy_50bins": 0.9953801956155197, "eval_calibration/confidence_entropy": 0.5099341289029582, "eval_calibration/coverage@0%": 0.0546875, "eval_calibration/coverage@1%": 0.0546875, "eval_calibration/coverage@10%": 0.0546875, "eval_calibration/coverage@15%": 0.0546875, "eval_calibration/coverage@20%": 0.0625, "eval_calibration/coverage@25%": 0.203125, "eval_calibration/coverage@30%": 0.296875, "eval_calibration/coverage@5%": 0.0546875, "eval_calibration/ece": 0.19125000000000003, "eval_calibration/mean_confidence": 0.42765624999999996, "eval_calibration/prompt_uniqueness": 0.9091796875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 418.0, "eval_completions/max_terminated_length": 418.0, "eval_completions/mean_length": 215.94012451171875, "eval_completions/mean_terminated_length": 215.94012451171875, "eval_completions/min_length": 121.75, "eval_completions/min_terminated_length": 121.75, "eval_loss": 0.0, "eval_num_tokens": 849115962.0, "eval_reward": 0.8419463336467743, "eval_reward_std": 0.23093872889876366, "eval_rewards/accuracy_reward": 0.44140625, "eval_rewards/brier_reward": 0.797715574502945, "eval_rewards/confidence_uniqueness_reward": 0.91015625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0027635535807348788, "eval_rewards/frontier_coverage_0": 0.16548816114664078, "eval_rewards/frontier_coverage_1": 0.16548816114664078, "eval_rewards/frontier_coverage_10": 0.16445041447877884, "eval_rewards/frontier_coverage_15": 0.15276920050382614, "eval_rewards/frontier_coverage_20": 0.09404854476451874, "eval_rewards/frontier_coverage_25": 0.05035925842821598, "eval_rewards/frontier_coverage_5": 0.16548816114664078, "eval_rewards/frontier_ece_reward": 0.00329311826499179, "eval_rewards/frontier_entropy_batch_reward": -0.6181488037109375, "eval_runtime": 21.5949, "eval_samples_per_second": 23.154, "eval_signal/accuracy_reward/centered_abs_mean": 0.479736328125, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4974188432097435, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2398681640625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2398681640625, "eval_signal/advantage_abs_mean": 0.21659140661358833, "eval_signal/advantage_pre_scale_abs_mean": 0.21659140661358833, "eval_signal/advantage_pre_scale_std": 0.22850319370627403, "eval_signal/advantage_std": 0.22850319370627403, "eval_signal/brier_reward/centered_abs_mean": 0.17879249900579453, "eval_signal/brier_reward/group_bin_occupancy": 0.8828125, "eval_signal/brier_reward/group_std_mean": 0.23039625957608223, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017879250459372997, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.017879250459372997, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0334930419921875, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.34375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.03927971515804529, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003349304257426411, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003349304257426411, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003261869656853378, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6484375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006061125197447836, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0773371438262984e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0773371438262984e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3526075705885887, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_0/group_std_mean": 0.434835322201252, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004407594562508166, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004407594562508166, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3526075705885887, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.434835322201252, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004407594562508166, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004407594562508166, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.34960638731718063, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.43143124133348465, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00437007995788008, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00437007995788008, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3217253088951111, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_15/group_std_mean": 0.39926163107156754, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00402156647760421, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00402156647760421, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1837029866874218, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.23896615207195282, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022962873918004334, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022962873918004334, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08366492204368114, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_25/group_std_mean": 0.11145731434226036, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010458114848006517, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010458114848006517, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3526075705885887, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.434835322201252, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004407594562508166, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004407594562508166, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.005593743873760104, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.921875, "eval_signal/frontier_ece_reward/group_std_mean": 0.007015189039520919, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005593743990175426, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005593743990175426, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31342506408691406, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33213482052087784, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031342506408691406, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031342506408691406, "eval_steps_per_second": 0.185, "step": 250 }, { "calibration/aurc": 0.22865828436295063, "calibration/batch_distribution_entropy": 0.9663037413073996, "calibration/batch_entropy_100bins": 0.9474363632028673, "calibration/batch_entropy_10bins": 0.9663037413073996, "calibration/batch_entropy_50bins": 0.9671213935238819, "calibration/batch_uniqueness": 0.96414794921875, "calibration/buffer_distribution_entropy": 0.9987440315376857, "calibration/buffer_entropy_100bins": 0.9831135431606464, "calibration/buffer_entropy_10bins": 0.9987440315376857, "calibration/buffer_entropy_50bins": 0.9953063778161239, "calibration/confidence_entropy": 0.47730441676015234, "calibration/coverage@0%": 0.01484375, "calibration/coverage@1%": 0.01484375, "calibration/coverage@10%": 0.165234375, "calibration/coverage@15%": 0.241015625, "calibration/coverage@20%": 0.522265625, "calibration/coverage@25%": 0.65703125, "calibration/coverage@30%": 0.797265625, "calibration/coverage@5%": 0.051953125, "calibration/ece": 0.14734528249960935, "calibration/mean_confidence": 0.5190219050003906, "calibration/prompt_uniqueness": 0.86396484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.8, "completions/max_terminated_length": 461.8, "completions/mean_length": 210.773828125, "completions/mean_terminated_length": 210.773828125, "completions/min_length": 99.4, "completions/min_terminated_length": 99.4, "epoch": 0.816, "grad_norm": 0.0009977244772017002, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 866373454.0, "reward": 0.9607008934020996, "reward_std": 0.0780528113245964, "rewards/accuracy_reward": 0.58310546875, "rewards/brier_reward": 0.7890378952026367, "rewards/confidence_uniqueness_reward": 0.9668556213378906, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0027697683311998845, "rewards/frontier_coverage_0": 0.06974590048193932, "rewards/frontier_coverage_1": 0.06974590048193932, "rewards/frontier_coverage_10": 0.06944515407085419, "rewards/frontier_coverage_15": 0.0675901010632515, "rewards/frontier_coverage_20": 0.05418720170855522, "rewards/frontier_coverage_25": 0.0440024096518755, "rewards/frontier_coverage_5": 0.0696968175470829, "rewards/frontier_ece_reward": 0.0021910452749580147, "rewards/frontier_entropy_batch_reward": -0.12180853635072708, "signal/accuracy_reward/centered_abs_mean": 0.091473388671875, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.12305806428194047, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0457366943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0457366943359375, "signal/advantage_abs_mean": 0.05912850499153137, "signal/advantage_pre_scale_abs_mean": 0.05912850499153137, "signal/advantage_pre_scale_std": 0.10116375237703323, "signal/advantage_std": 0.10116375237703323, "signal/brier_reward/centered_abs_mean": 0.11915633082389832, "signal/brier_reward/group_bin_occupancy": 0.8625, "signal/brier_reward/group_std_mean": 0.15226575136184692, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011915633082389831, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011915633082389831, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01175994873046875, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.875, "signal/confidence_uniqueness_reward/group_std_mean": 0.014767202734947204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001175994914956391, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001175994914956391, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026121003553271293, "signal/frontier_aurc_reward/group_bin_occupancy": 0.709375, "signal/frontier_aurc_reward/group_std_mean": 0.004367161309346557, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.265125487814657e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.265125487814657e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1560654640197754, "signal/frontier_coverage_0/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_0/group_std_mean": 0.1990586817264557, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001950818463228643, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001950818463228643, "signal/frontier_coverage_1/centered_abs_mean": 0.1560654640197754, "signal/frontier_coverage_1/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_1/group_std_mean": 0.1990586817264557, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001950818463228643, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001950818463228643, "signal/frontier_coverage_10/centered_abs_mean": 0.15450561046600342, "signal/frontier_coverage_10/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_10/group_std_mean": 0.19710008800029755, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001931320084258914, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001931320084258914, "signal/frontier_coverage_15/centered_abs_mean": 0.14276299774646758, "signal/frontier_coverage_15/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_15/group_std_mean": 0.1824551671743393, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017845374997705222, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017845374997705222, "signal/frontier_coverage_20/centered_abs_mean": 0.08745990991592408, "signal/frontier_coverage_20/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_20/group_std_mean": 0.11309091001749039, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001093248906545341, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001093248906545341, "signal/frontier_coverage_25/centered_abs_mean": 0.05504238083958626, "signal/frontier_coverage_25/group_bin_occupancy": 0.92265625, "signal/frontier_coverage_25/group_std_mean": 0.07104835510253907, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006880297674797476, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006880297674797476, "signal/frontier_coverage_5/centered_abs_mean": 0.15581389665603637, "signal/frontier_coverage_5/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_5/group_std_mean": 0.1987439811229706, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019476738292723895, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019476738292723895, "signal/frontier_ece_reward/centered_abs_mean": 0.003923707129433751, "signal/frontier_ece_reward/group_bin_occupancy": 0.658203125, "signal/frontier_ece_reward/group_std_mean": 0.00497177829965949, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003923707117792219, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003923707117792219, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18463816046714782, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2472657859325409, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01846381649374962, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01846381649374962, "step": 255 }, { "calibration/aurc": 0.2778293031515605, "calibration/batch_distribution_entropy": 0.9723484188459602, "calibration/batch_entropy_100bins": 0.9456738301824512, "calibration/batch_entropy_10bins": 0.9723484188459602, "calibration/batch_entropy_50bins": 0.968407087893359, "calibration/batch_uniqueness": 0.962615966796875, "calibration/buffer_distribution_entropy": 0.9986561322880023, "calibration/buffer_entropy_100bins": 0.9815412495509671, "calibration/buffer_entropy_10bins": 0.9986561322880023, "calibration/buffer_entropy_50bins": 0.9951468722609143, "calibration/confidence_entropy": 0.4893640574141166, "calibration/coverage@0%": 0.034375, "calibration/coverage@1%": 0.034375, "calibration/coverage@10%": 0.24140625, "calibration/coverage@15%": 0.296875, "calibration/coverage@20%": 0.34765625, "calibration/coverage@25%": 0.4203125, "calibration/coverage@30%": 0.58828125, "calibration/coverage@5%": 0.151953125, "calibration/ece": 0.11994438798437498, "calibration/mean_confidence": 0.480172799515625, "calibration/prompt_uniqueness": 0.86845703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 692.4, "completions/max_terminated_length": 512.6, "completions/mean_length": 218.40234375, "completions/mean_terminated_length": 218.27389831542968, "completions/min_length": 100.2, "completions/min_terminated_length": 100.2, "epoch": 0.832, "grad_norm": 0.0008983217994682491, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 883618246.0, "reward": 0.9495777606964111, "reward_std": 0.07850262373685837, "rewards/accuracy_reward": 0.553125, "rewards/brier_reward": 0.8154358863830566, "rewards/confidence_uniqueness_reward": 0.9653682827949523, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002407958754338324, "rewards/frontier_coverage_0": 0.11853809803724288, "rewards/frontier_coverage_1": 0.11853809803724288, "rewards/frontier_coverage_10": 0.11834341883659363, "rewards/frontier_coverage_15": 0.10681554824113845, "rewards/frontier_coverage_20": 0.07330435365438462, "rewards/frontier_coverage_25": 0.05658877268433571, "rewards/frontier_coverage_5": 0.11847927272319794, "rewards/frontier_ece_reward": 0.002967313444241881, "rewards/frontier_entropy_batch_reward": -0.14165550619363784, "signal/accuracy_reward/centered_abs_mean": 0.0945068359375, "signal/accuracy_reward/group_bin_occupancy": 0.1703125, "signal/accuracy_reward/group_std_mean": 0.12645548731088638, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04725341796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04725341796875, "signal/advantage_abs_mean": 0.059925134479999545, "signal/advantage_pre_scale_abs_mean": 0.059925134479999545, "signal/advantage_pre_scale_std": 0.10259477943181991, "signal/advantage_std": 0.10259477943181991, "signal/brier_reward/centered_abs_mean": 0.10476993620395661, "signal/brier_reward/group_bin_occupancy": 0.849609375, "signal/brier_reward/group_std_mean": 0.134723761677742, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010476993769407273, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010476993769407273, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012731090188026428, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86875, "signal/confidence_uniqueness_reward/group_std_mean": 0.016239034570753576, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012731090188026427, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012731090188026427, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002151795034296811, "signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125, "signal/frontier_aurc_reward/group_std_mean": 0.0035023723961785437, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6897438874584623e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6897438874584623e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14922354221343995, "signal/frontier_coverage_0/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_0/group_std_mean": 0.1917937785387039, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018652942962944508, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018652942962944508, "signal/frontier_coverage_1/centered_abs_mean": 0.14922354221343995, "signal/frontier_coverage_1/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_1/group_std_mean": 0.1917937785387039, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018652942962944508, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018652942962944508, "signal/frontier_coverage_10/centered_abs_mean": 0.14830959737300872, "signal/frontier_coverage_10/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_10/group_std_mean": 0.1906294882297516, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018538699485361576, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018538699485361576, "signal/frontier_coverage_15/centered_abs_mean": 0.13270397633314132, "signal/frontier_coverage_15/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_15/group_std_mean": 0.1706369161605835, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016587997553870082, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016587997553870082, "signal/frontier_coverage_20/centered_abs_mean": 0.08176114857196808, "signal/frontier_coverage_20/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_20/group_std_mean": 0.10538419336080551, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010220143478363753, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010220143478363753, "signal/frontier_coverage_25/centered_abs_mean": 0.05487861037254334, "signal/frontier_coverage_25/group_bin_occupancy": 0.926953125, "signal/frontier_coverage_25/group_std_mean": 0.06957651078701019, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006859826273284853, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006859826273284853, "signal/frontier_coverage_5/centered_abs_mean": 0.14910914599895478, "signal/frontier_coverage_5/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_5/group_std_mean": 0.1916535586118698, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018638643436133862, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018638643436133862, "signal/frontier_ece_reward/centered_abs_mean": 0.003827466629445553, "signal/frontier_ece_reward/group_bin_occupancy": 0.633203125, "signal/frontier_ece_reward/group_std_mean": 0.004841751419007778, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003827466571237892, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003827466571237892, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19984618723392486, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2566663324832916, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019984618946909904, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019984618946909904, "step": 260 }, { "calibration/aurc": 0.3167713713435312, "calibration/batch_distribution_entropy": 0.9752872878231809, "calibration/batch_entropy_100bins": 0.9470149013132019, "calibration/batch_entropy_10bins": 0.9752872878231809, "calibration/batch_entropy_50bins": 0.9694887062876333, "calibration/batch_uniqueness": 0.9655956675797309, "calibration/buffer_distribution_entropy": 0.9986996758924558, "calibration/buffer_entropy_100bins": 0.9795660954880914, "calibration/buffer_entropy_10bins": 0.9986996758924558, "calibration/buffer_entropy_50bins": 0.9950240138509777, "calibration/confidence_entropy": 0.49941824109751864, "calibration/coverage@0%": 0.03948905332681017, "calibration/coverage@1%": 0.06258102984344423, "calibration/coverage@10%": 0.2123455846379648, "calibration/coverage@15%": 0.3135373348825832, "calibration/coverage@20%": 0.39717312866927595, "calibration/coverage@25%": 0.47103183096868884, "calibration/coverage@30%": 0.5230140044031311, "calibration/coverage@5%": 0.14667471868884538, "calibration/ece": 0.1526993445146855, "calibration/mean_confidence": 0.5272793125303242, "calibration/prompt_uniqueness": 0.8712022287168313, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.2, "completions/max_terminated_length": 520.2, "completions/mean_length": 221.2171875, "completions/mean_terminated_length": 221.2171875, "completions/min_length": 103.6, "completions/min_terminated_length": 103.6, "epoch": 0.848, "grad_norm": 0.0008509070612490177, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 900897878.0, "reward": 0.9409607172012329, "reward_std": 0.0743522085249424, "rewards/accuracy_reward": 0.53916015625, "rewards/brier_reward": 0.7976258873939515, "rewards/confidence_uniqueness_reward": 0.9661872863769532, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002944446820765734, "rewards/frontier_coverage_0": 0.10507128238677979, "rewards/frontier_coverage_1": 0.10507128238677979, "rewards/frontier_coverage_10": 0.10452338606119156, "rewards/frontier_coverage_15": 0.09657607525587082, "rewards/frontier_coverage_20": 0.05965607911348343, "rewards/frontier_coverage_25": 0.045341891795396806, "rewards/frontier_coverage_5": 0.1049825593829155, "rewards/frontier_ece_reward": 0.0027819779235869644, "rewards/frontier_entropy_batch_reward": -0.12909687906503678, "signal/accuracy_reward/centered_abs_mean": 0.078118896484375, "signal/accuracy_reward/group_bin_occupancy": 0.166015625, "signal/accuracy_reward/group_std_mean": 0.10906965583562851, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0390594482421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0390594482421875, "signal/advantage_abs_mean": 0.05558159351348877, "signal/advantage_pre_scale_abs_mean": 0.05558159351348877, "signal/advantage_pre_scale_std": 0.09597710967063904, "signal/advantage_std": 0.09597710967063904, "signal/brier_reward/centered_abs_mean": 0.11261094510555267, "signal/brier_reward/group_bin_occupancy": 0.868359375, "signal/brier_reward/group_std_mean": 0.14441443979740143, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0112610949203372, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0112610949203372, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012626891583204269, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.853125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016479195840656758, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012626891722902656, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012626891722902656, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002747892402112484, "signal/frontier_aurc_reward/group_bin_occupancy": 0.703125, "signal/frontier_aurc_reward/group_std_mean": 0.004585493355989456, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.434865539020393e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.434865539020393e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14814209938049316, "signal/frontier_coverage_0/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_0/group_std_mean": 0.19057103991508484, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018517762422561646, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018517762422561646, "signal/frontier_coverage_1/centered_abs_mean": 0.14814209938049316, "signal/frontier_coverage_1/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_1/group_std_mean": 0.19057103991508484, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018517762422561646, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018517762422561646, "signal/frontier_coverage_10/centered_abs_mean": 0.14656879603862763, "signal/frontier_coverage_10/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_10/group_std_mean": 0.18858122825622559, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018321099691092969, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018321099691092969, "signal/frontier_coverage_15/centered_abs_mean": 0.13467406630516052, "signal/frontier_coverage_15/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_15/group_std_mean": 0.17328309118747712, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016834259033203125, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016834259033203125, "signal/frontier_coverage_20/centered_abs_mean": 0.08421734273433686, "signal/frontier_coverage_20/group_bin_occupancy": 0.9078125, "signal/frontier_coverage_20/group_std_mean": 0.10839319676160812, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010527168167755007, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010527168167755007, "signal/frontier_coverage_25/centered_abs_mean": 0.054951707273721694, "signal/frontier_coverage_25/group_bin_occupancy": 0.9265625, "signal/frontier_coverage_25/group_std_mean": 0.07051893323659897, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006868963362649083, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006868963362649083, "signal/frontier_coverage_5/centered_abs_mean": 0.1478155016899109, "signal/frontier_coverage_5/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_5/group_std_mean": 0.1901459276676178, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018476937897503377, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018476937897503377, "signal/frontier_ece_reward/centered_abs_mean": 0.0038679220713675024, "signal/frontier_ece_reward/group_bin_occupancy": 0.658984375, "signal/frontier_ece_reward/group_std_mean": 0.00496214609593153, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038679222343489527, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038679222343489527, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18982007205486298, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.794921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24912202656269072, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01898200698196888, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01898200698196888, "step": 265 }, { "calibration/aurc": 0.2768838924896155, "calibration/batch_distribution_entropy": 0.9617687964595516, "calibration/batch_entropy_100bins": 0.9343288350446255, "calibration/batch_entropy_10bins": 0.9617687964595516, "calibration/batch_entropy_50bins": 0.9600482617272046, "calibration/batch_uniqueness": 0.966256189324948, "calibration/buffer_distribution_entropy": 0.9987696399448172, "calibration/buffer_entropy_100bins": 0.9778730418080352, "calibration/buffer_entropy_10bins": 0.9987696399448172, "calibration/buffer_entropy_50bins": 0.9949262133230533, "calibration/confidence_entropy": 0.49433436483942234, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.11484375, "calibration/coverage@15%": 0.1546875, "calibration/coverage@20%": 0.284375, "calibration/coverage@25%": 0.42265625, "calibration/coverage@30%": 0.493359375, "calibration/coverage@5%": 0.060546875, "calibration/ece": 0.13839882712774432, "calibration/mean_confidence": 0.600460792490651, "calibration/prompt_uniqueness": 0.881765921728668, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 970.0, "completions/max_terminated_length": 566.6, "completions/mean_length": 223.150390625, "completions/mean_terminated_length": 222.89442138671876, "completions/min_length": 108.2, "completions/min_terminated_length": 108.2, "epoch": 0.864, "grad_norm": 0.0009654518216848373, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 918169754.0, "reward": 0.9622796416282654, "reward_std": 0.07733558416366577, "rewards/accuracy_reward": 0.5888671875, "rewards/brier_reward": 0.7954235196113586, "rewards/confidence_uniqueness_reward": 0.9653422832489014, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002754275593906641, "rewards/frontier_coverage_0": 0.07536681443452835, "rewards/frontier_coverage_1": 0.07536681443452835, "rewards/frontier_coverage_10": 0.07511008605360985, "rewards/frontier_coverage_15": 0.06949677914381028, "rewards/frontier_coverage_20": 0.052758050709962846, "rewards/frontier_coverage_25": 0.047069764137268065, "rewards/frontier_coverage_5": 0.07536681443452835, "rewards/frontier_ece_reward": 0.0024394527310505508, "rewards/frontier_entropy_batch_reward": -0.14224078506231308, "signal/accuracy_reward/centered_abs_mean": 0.08975830078125, "signal/accuracy_reward/group_bin_occupancy": 0.167578125, "signal/accuracy_reward/group_std_mean": 0.11916272044181823, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044879150390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044879150390625, "signal/advantage_abs_mean": 0.059315939992666246, "signal/advantage_pre_scale_abs_mean": 0.059315939992666246, "signal/advantage_pre_scale_std": 0.10067972093820572, "signal/advantage_std": 0.10067972093820572, "signal/brier_reward/centered_abs_mean": 0.11611681431531906, "signal/brier_reward/group_bin_occupancy": 0.86328125, "signal/brier_reward/group_std_mean": 0.1489056169986725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011611681431531906, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011611681431531906, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013113933056592942, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87734375, "signal/confidence_uniqueness_reward/group_std_mean": 0.017149509117007255, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013113933615386485, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013113933615386485, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027526959776878355, "signal/frontier_aurc_reward/group_bin_occupancy": 0.719140625, "signal/frontier_aurc_reward/group_std_mean": 0.00466503887437284, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.440870204940438e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.440870204940438e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15471426248550416, "signal/frontier_coverage_0/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_0/group_std_mean": 0.19702006578445436, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019339283695444464, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019339283695444464, "signal/frontier_coverage_1/centered_abs_mean": 0.15471426248550416, "signal/frontier_coverage_1/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_1/group_std_mean": 0.19702006578445436, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019339283695444464, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019339283695444464, "signal/frontier_coverage_10/centered_abs_mean": 0.15364649891853333, "signal/frontier_coverage_10/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_10/group_std_mean": 0.19570617973804474, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001920581259764731, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001920581259764731, "signal/frontier_coverage_15/centered_abs_mean": 0.13317597806453704, "signal/frontier_coverage_15/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_15/group_std_mean": 0.1696739375591278, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016646997770294546, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016646997770294546, "signal/frontier_coverage_20/centered_abs_mean": 0.08484991490840912, "signal/frontier_coverage_20/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_20/group_std_mean": 0.10823871493339539, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010606239549815655, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010606239549815655, "signal/frontier_coverage_25/centered_abs_mean": 0.05707306563854218, "signal/frontier_coverage_25/group_bin_occupancy": 0.928515625, "signal/frontier_coverage_25/group_std_mean": 0.07243188172578811, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007134133367799223, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007134133367799223, "signal/frontier_coverage_5/centered_abs_mean": 0.15471426248550416, "signal/frontier_coverage_5/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_5/group_std_mean": 0.19702006578445436, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019339283695444464, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019339283695444464, "signal/frontier_ece_reward/centered_abs_mean": 0.004025649838149547, "signal/frontier_ece_reward/group_bin_occupancy": 0.6640625, "signal/frontier_ece_reward/group_std_mean": 0.0050605999305844305, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004025649803224951, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004025649803224951, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1900925815105438, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.792578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25062963366508484, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019009258598089218, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019009258598089218, "step": 270 }, { "calibration/aurc": 0.3686495707449447, "calibration/batch_distribution_entropy": 0.9786163924820228, "calibration/batch_entropy_100bins": 0.9506496876710446, "calibration/batch_entropy_10bins": 0.9786163924820228, "calibration/batch_entropy_50bins": 0.973175814835131, "calibration/batch_uniqueness": 0.964288330078125, "calibration/buffer_distribution_entropy": 0.9985241376950669, "calibration/buffer_entropy_100bins": 0.9767269257308445, "calibration/buffer_entropy_10bins": 0.9985241376950669, "calibration/buffer_entropy_50bins": 0.9947127581671238, "calibration/confidence_entropy": 0.4652774799868107, "calibration/coverage@0%": 0.0109375, "calibration/coverage@1%": 0.0109375, "calibration/coverage@10%": 0.055078125, "calibration/coverage@15%": 0.14765625, "calibration/coverage@20%": 0.2, "calibration/coverage@25%": 0.250390625, "calibration/coverage@30%": 0.327734375, "calibration/coverage@5%": 0.0234375, "calibration/ece": 0.15863184056992186, "calibration/mean_confidence": 0.5096962844300781, "calibration/prompt_uniqueness": 0.8615234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.8, "completions/max_terminated_length": 550.8, "completions/mean_length": 222.6953125, "completions/mean_terminated_length": 222.6953125, "completions/min_length": 101.8, "completions/min_terminated_length": 101.8, "epoch": 0.88, "grad_norm": 0.0008373066666536033, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 935597226.0, "reward": 0.9281533002853394, "reward_std": 0.0796884223818779, "rewards/accuracy_reward": 0.51435546875, "rewards/brier_reward": 0.7964529275894165, "rewards/confidence_uniqueness_reward": 0.9651763916015625, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003106454946100712, "rewards/frontier_coverage_0": 0.11970811784267425, "rewards/frontier_coverage_1": 0.11970811784267425, "rewards/frontier_coverage_10": 0.11946074962615967, "rewards/frontier_coverage_15": 0.10414378494024276, "rewards/frontier_coverage_20": 0.07021132558584213, "rewards/frontier_coverage_25": 0.05124953538179398, "rewards/frontier_coverage_5": 0.11970811784267425, "rewards/frontier_ece_reward": 0.0030206756200641394, "rewards/frontier_entropy_batch_reward": -0.14252967536449432, "signal/accuracy_reward/centered_abs_mean": 0.091741943359375, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12339542210102081, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0458709716796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0458709716796875, "signal/advantage_abs_mean": 0.061288871616125104, "signal/advantage_pre_scale_abs_mean": 0.061288871616125104, "signal/advantage_pre_scale_std": 0.10269816666841507, "signal/advantage_std": 0.10269816666841507, "signal/brier_reward/centered_abs_mean": 0.12020632475614548, "signal/brier_reward/group_bin_occupancy": 0.840234375, "signal/brier_reward/group_std_mean": 0.15456699430942536, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012020632438361645, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012020632438361645, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01308910846710205, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85390625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016654768399894236, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001308910851366818, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001308910851366818, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031963346991688013, "signal/frontier_aurc_reward/group_bin_occupancy": 0.694921875, "signal/frontier_aurc_reward/group_std_mean": 0.005663991440087557, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9954184467205776e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9954184467205776e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16143686175346375, "signal/frontier_coverage_0/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_0/group_std_mean": 0.20570681393146514, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020179608603939415, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020179608603939415, "signal/frontier_coverage_1/centered_abs_mean": 0.16143686175346375, "signal/frontier_coverage_1/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_1/group_std_mean": 0.20570681393146514, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020179608603939415, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020179608603939415, "signal/frontier_coverage_10/centered_abs_mean": 0.16061599552631378, "signal/frontier_coverage_10/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_10/group_std_mean": 0.2046827495098114, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020077000837773083, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020077000837773083, "signal/frontier_coverage_15/centered_abs_mean": 0.1376311719417572, "signal/frontier_coverage_15/group_bin_occupancy": 0.85625, "signal/frontier_coverage_15/group_std_mean": 0.17559443712234496, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001720389723777771, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001720389723777771, "signal/frontier_coverage_20/centered_abs_mean": 0.08813889771699905, "signal/frontier_coverage_20/group_bin_occupancy": 0.88125, "signal/frontier_coverage_20/group_std_mean": 0.11294594407081604, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011017362354323267, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011017362354323267, "signal/frontier_coverage_25/centered_abs_mean": 0.05825449377298355, "signal/frontier_coverage_25/group_bin_occupancy": 0.927734375, "signal/frontier_coverage_25/group_std_mean": 0.07462759166955948, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007281811907887459, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007281811907887459, "signal/frontier_coverage_5/centered_abs_mean": 0.16143686175346375, "signal/frontier_coverage_5/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_5/group_std_mean": 0.20570681393146514, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020179608603939415, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020179608603939415, "signal/frontier_ece_reward/centered_abs_mean": 0.004061997029930353, "signal/frontier_ece_reward/group_bin_occupancy": 0.656640625, "signal/frontier_ece_reward/group_std_mean": 0.005123640317469836, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00040619971114210787, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00040619971114210787, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19641498029232024, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2599118322134018, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01964149847626686, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01964149847626686, "step": 275 }, { "calibration/aurc": 0.35893480202497713, "calibration/batch_distribution_entropy": 0.9836316827385708, "calibration/batch_entropy_100bins": 0.951102171596286, "calibration/batch_entropy_10bins": 0.9836316827385708, "calibration/batch_entropy_50bins": 0.9744830634854351, "calibration/batch_uniqueness": 0.9661264253561747, "calibration/buffer_distribution_entropy": 0.9982682590269872, "calibration/buffer_entropy_100bins": 0.9763659335051681, "calibration/buffer_entropy_10bins": 0.9982682590269872, "calibration/buffer_entropy_50bins": 0.9945770986362333, "calibration/confidence_entropy": 0.49407042141491697, "calibration/coverage@0%": 0.0125, "calibration/coverage@1%": 0.0125, "calibration/coverage@10%": 0.0390625, "calibration/coverage@15%": 0.044921875, "calibration/coverage@20%": 0.16015625, "calibration/coverage@25%": 0.3020907228473581, "calibration/coverage@30%": 0.47141557607632095, "calibration/coverage@5%": 0.018359375, "calibration/ece": 0.1521837402607701, "calibration/mean_confidence": 0.5152094586979612, "calibration/prompt_uniqueness": 0.865876762080515, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 700.0, "completions/max_terminated_length": 506.8, "completions/mean_length": 226.031640625, "completions/mean_terminated_length": 225.90355529785157, "completions/min_length": 99.4, "completions/min_terminated_length": 99.4, "epoch": 0.896, "grad_norm": 0.0007808567606844008, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 953022638.0, "reward": 0.9413276553153992, "reward_std": 0.0753513365983963, "rewards/accuracy_reward": 0.54345703125, "rewards/brier_reward": 0.792955505847931, "rewards/confidence_uniqueness_reward": 0.9654982805252075, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00300391623750329, "rewards/frontier_coverage_0": 0.10249666720628739, "rewards/frontier_coverage_1": 0.10249666720628739, "rewards/frontier_coverage_10": 0.10201466381549835, "rewards/frontier_coverage_15": 0.0916542112827301, "rewards/frontier_coverage_20": 0.06445520780980588, "rewards/frontier_coverage_25": 0.04657732546329498, "rewards/frontier_coverage_5": 0.10260441452264786, "rewards/frontier_ece_reward": 0.002176952688023448, "rewards/frontier_entropy_batch_reward": -0.1403130456805229, "signal/accuracy_reward/centered_abs_mean": 0.084613037109375, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.11815287619829178, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0423065185546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0423065185546875, "signal/advantage_abs_mean": 0.056077169626951216, "signal/advantage_pre_scale_abs_mean": 0.056077169626951216, "signal/advantage_pre_scale_std": 0.09611705392599106, "signal/advantage_std": 0.09611705392599106, "signal/brier_reward/centered_abs_mean": 0.11416497230529785, "signal/brier_reward/group_bin_occupancy": 0.862890625, "signal/brier_reward/group_std_mean": 0.1458996891975403, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011416497454047204, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011416497454047204, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01277841292321682, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.016587360575795174, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012778413249179721, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012778413249179721, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026259610895067453, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7, "signal/frontier_aurc_reward/group_std_mean": 0.004510869830846786, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.282451471022796e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.282451471022796e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15629157423973083, "signal/frontier_coverage_0/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_0/group_std_mean": 0.20013498663902282, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019536447478458287, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019536447478458287, "signal/frontier_coverage_1/centered_abs_mean": 0.15629157423973083, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.20013498663902282, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019536447478458287, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019536447478458287, "signal/frontier_coverage_10/centered_abs_mean": 0.15510292947292328, "signal/frontier_coverage_10/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_10/group_std_mean": 0.19859937131404876, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019387866836041213, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019387866836041213, "signal/frontier_coverage_15/centered_abs_mean": 0.13360550701618196, "signal/frontier_coverage_15/group_bin_occupancy": 0.8625, "signal/frontier_coverage_15/group_std_mean": 0.17097563147544861, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016700688749551774, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016700688749551774, "signal/frontier_coverage_20/centered_abs_mean": 0.08658400624990463, "signal/frontier_coverage_20/group_bin_occupancy": 0.88125, "signal/frontier_coverage_20/group_std_mean": 0.11109910905361176, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010823000688105822, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010823000688105822, "signal/frontier_coverage_25/centered_abs_mean": 0.05502760782837868, "signal/frontier_coverage_25/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_25/group_std_mean": 0.07039647549390793, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006878450978547335, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006878450978547335, "signal/frontier_coverage_5/centered_abs_mean": 0.15607451200485228, "signal/frontier_coverage_5/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_5/group_std_mean": 0.19986163973808288, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001950931502506137, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001950931502506137, "signal/frontier_ece_reward/centered_abs_mean": 0.003750496730208397, "signal/frontier_ece_reward/group_bin_occupancy": 0.648046875, "signal/frontier_ece_reward/group_std_mean": 0.004760450683534145, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037504968349821866, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037504968349821866, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1986088812351227, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.754296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2622865170240402, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01986088827252388, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01986088827252388, "step": 280 }, { "calibration/aurc": 0.34563367565090275, "calibration/batch_distribution_entropy": 0.9775858117370444, "calibration/batch_entropy_100bins": 0.9498917342401729, "calibration/batch_entropy_10bins": 0.9775858117370444, "calibration/batch_entropy_50bins": 0.9729999885268757, "calibration/batch_uniqueness": 0.96455078125, "calibration/buffer_distribution_entropy": 0.998200870549627, "calibration/buffer_entropy_100bins": 0.9762282535895963, "calibration/buffer_entropy_10bins": 0.998200870549627, "calibration/buffer_entropy_50bins": 0.9945279583894413, "calibration/confidence_entropy": 0.5020932663493712, "calibration/coverage@0%": 0.030859375, "calibration/coverage@1%": 0.030859375, "calibration/coverage@10%": 0.0953125, "calibration/coverage@15%": 0.168359375, "calibration/coverage@20%": 0.27109375, "calibration/coverage@25%": 0.35703125, "calibration/coverage@30%": 0.4875, "calibration/coverage@5%": 0.044140625, "calibration/ece": 0.143530546875, "calibration/mean_confidence": 0.478352265625, "calibration/prompt_uniqueness": 0.869384765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1195.8, "completions/max_terminated_length": 617.6, "completions/mean_length": 228.848828125, "completions/mean_terminated_length": 228.46502075195312, "completions/min_length": 102.6, "completions/min_terminated_length": 102.6, "epoch": 0.912, "grad_norm": 0.000804597744718194, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 970417346.0, "reward": 0.945005738735199, "reward_std": 0.07620135098695754, "rewards/accuracy_reward": 0.5462890625, "rewards/brier_reward": 0.7971989035606384, "rewards/confidence_uniqueness_reward": 0.9653137803077698, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002510549989528954, "rewards/frontier_coverage_0": 0.10179270692169666, "rewards/frontier_coverage_1": 0.10179270692169666, "rewards/frontier_coverage_10": 0.10152241215109825, "rewards/frontier_coverage_15": 0.09234566390514373, "rewards/frontier_coverage_20": 0.06595533415675163, "rewards/frontier_coverage_25": 0.04856384471058846, "rewards/frontier_coverage_5": 0.10167570598423481, "rewards/frontier_ece_reward": 0.002250720001757145, "rewards/frontier_entropy_batch_reward": -0.12107873558998108, "signal/accuracy_reward/centered_abs_mean": 0.08663330078125, "signal/accuracy_reward/group_bin_occupancy": 0.16953125, "signal/accuracy_reward/group_std_mean": 0.11930725127458572, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043316650390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.043316650390625, "signal/advantage_abs_mean": 0.057180730253458024, "signal/advantage_pre_scale_abs_mean": 0.057180730253458024, "signal/advantage_pre_scale_std": 0.09748922139406205, "signal/advantage_std": 0.09748922139406205, "signal/brier_reward/centered_abs_mean": 0.12046879529953003, "signal/brier_reward/group_bin_occupancy": 0.85625, "signal/brier_reward/group_std_mean": 0.15507035553455353, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012046879716217518, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012046879716217518, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013228565640747546, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86015625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01768874190747738, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001322856592014432, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001322856592014432, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023816948756575584, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71015625, "signal/frontier_aurc_reward/group_std_mean": 0.004248477658256889, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9771187109872697e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9771187109872697e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1678558111190796, "signal/frontier_coverage_0/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_0/group_std_mean": 0.21537896990776062, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002098197676241398, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002098197676241398, "signal/frontier_coverage_1/centered_abs_mean": 0.1678558111190796, "signal/frontier_coverage_1/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_1/group_std_mean": 0.21537896990776062, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002098197676241398, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002098197676241398, "signal/frontier_coverage_10/centered_abs_mean": 0.16678664982318878, "signal/frontier_coverage_10/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_10/group_std_mean": 0.21400391459465026, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002084833150729537, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002084833150729537, "signal/frontier_coverage_15/centered_abs_mean": 0.14504911601543427, "signal/frontier_coverage_15/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_15/group_std_mean": 0.18602396845817565, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001813114038668573, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001813114038668573, "signal/frontier_coverage_20/centered_abs_mean": 0.0916235864162445, "signal/frontier_coverage_20/group_bin_occupancy": 0.88828125, "signal/frontier_coverage_20/group_std_mean": 0.11773888915777206, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001145294844172895, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001145294844172895, "signal/frontier_coverage_25/centered_abs_mean": 0.058785200119018555, "signal/frontier_coverage_25/group_bin_occupancy": 0.917578125, "signal/frontier_coverage_25/group_std_mean": 0.07563513517379761, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007348150131292642, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007348150131292642, "signal/frontier_coverage_5/centered_abs_mean": 0.16751802563667298, "signal/frontier_coverage_5/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_5/group_std_mean": 0.21494931280612944, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020939753856509926, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020939753856509926, "signal/frontier_ece_reward/centered_abs_mean": 0.0036217204295098783, "signal/frontier_ece_reward/group_bin_occupancy": 0.645703125, "signal/frontier_ece_reward/group_std_mean": 0.004650117922574281, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003621720476076007, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003621720476076007, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18358459174633027, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751953125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24674877524375916, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018358458951115608, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018358458951115608, "step": 285 }, { "calibration/aurc": 0.4062298187879727, "calibration/batch_distribution_entropy": 0.9874305729560149, "calibration/batch_entropy_100bins": 0.9521924925781645, "calibration/batch_entropy_10bins": 0.9874305729560149, "calibration/batch_entropy_50bins": 0.9740365739770798, "calibration/batch_uniqueness": 0.96695556640625, "calibration/buffer_distribution_entropy": 0.9983456480552251, "calibration/buffer_entropy_100bins": 0.9763740046184466, "calibration/buffer_entropy_10bins": 0.9983456480552251, "calibration/buffer_entropy_50bins": 0.994612891791807, "calibration/confidence_entropy": 0.5023546384987589, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.010546875, "calibration/coverage@15%": 0.0171875, "calibration/coverage@20%": 0.034765625, "calibration/coverage@25%": 0.05625, "calibration/coverage@30%": 0.170703125, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.130933474109375, "calibration/mean_confidence": 0.5065108178593749, "calibration/prompt_uniqueness": 0.874267578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.6, "completions/max_terminated_length": 502.6, "completions/mean_length": 219.16572265625, "completions/mean_terminated_length": 219.16572265625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.928, "grad_norm": 0.000721353106200695, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 987688419.0, "reward": 0.932941198348999, "reward_std": 0.07610684931278229, "rewards/accuracy_reward": 0.52978515625, "rewards/brier_reward": 0.7867828845977783, "rewards/confidence_uniqueness_reward": 0.9635459899902343, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003236295934766531, "rewards/frontier_coverage_0": 0.11244506537914276, "rewards/frontier_coverage_1": 0.11244506537914276, "rewards/frontier_coverage_10": 0.11204758733510971, "rewards/frontier_coverage_15": 0.098624786734581, "rewards/frontier_coverage_20": 0.06998651325702668, "rewards/frontier_coverage_25": 0.05211614817380905, "rewards/frontier_coverage_5": 0.11240084767341614, "rewards/frontier_ece_reward": 0.002500415127724409, "rewards/frontier_entropy_batch_reward": -0.1556967318058014, "signal/accuracy_reward/centered_abs_mean": 0.088250732421875, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.11783604025840759, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441253662109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0441253662109375, "signal/advantage_abs_mean": 0.05913291722536087, "signal/advantage_pre_scale_abs_mean": 0.05913291722536087, "signal/advantage_pre_scale_std": 0.0993342086672783, "signal/advantage_std": 0.0993342086672783, "signal/brier_reward/centered_abs_mean": 0.12455331236124038, "signal/brier_reward/group_bin_occupancy": 0.838671875, "signal/brier_reward/group_std_mean": 0.15881660878658294, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012455331720411777, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012455331720411777, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014005064964294434, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.01789715252816677, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014005065197125078, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014005065197125078, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.003072420973330736, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69375, "signal/frontier_aurc_reward/group_std_mean": 0.005341992899775505, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8405264785978946e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8405264785978946e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16175754070281984, "signal/frontier_coverage_0/group_bin_occupancy": 0.84609375, "signal/frontier_coverage_0/group_std_mean": 0.20798506438732148, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020219693426042793, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020219693426042793, "signal/frontier_coverage_1/centered_abs_mean": 0.16175754070281984, "signal/frontier_coverage_1/group_bin_occupancy": 0.84609375, "signal/frontier_coverage_1/group_std_mean": 0.20798506438732148, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020219693426042793, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020219693426042793, "signal/frontier_coverage_10/centered_abs_mean": 0.1606542646884918, "signal/frontier_coverage_10/group_bin_occupancy": 0.845703125, "signal/frontier_coverage_10/group_std_mean": 0.20660010874271392, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020081782713532448, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020081782713532448, "signal/frontier_coverage_15/centered_abs_mean": 0.138150891661644, "signal/frontier_coverage_15/group_bin_occupancy": 0.83984375, "signal/frontier_coverage_15/group_std_mean": 0.178511181473732, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017268861876800657, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017268861876800657, "signal/frontier_coverage_20/centered_abs_mean": 0.08926409333944321, "signal/frontier_coverage_20/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_20/group_std_mean": 0.11590456813573838, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011158011853694915, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011158011853694915, "signal/frontier_coverage_25/centered_abs_mean": 0.06056609675288201, "signal/frontier_coverage_25/group_bin_occupancy": 0.921875, "signal/frontier_coverage_25/group_std_mean": 0.07759960442781448, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000757076172158122, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000757076172158122, "signal/frontier_coverage_5/centered_abs_mean": 0.16137201189994813, "signal/frontier_coverage_5/group_bin_occupancy": 0.845703125, "signal/frontier_coverage_5/group_std_mean": 0.2075114369392395, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020171501440927387, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020171501440927387, "signal/frontier_ece_reward/centered_abs_mean": 0.003959872899577022, "signal/frontier_ece_reward/group_bin_occupancy": 0.637109375, "signal/frontier_ece_reward/group_std_mean": 0.0050338350236415865, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003959872992709279, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003959872992709279, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20363571047782897, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.778125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.26806468367576597, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.020363571867346764, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020363571867346764, "step": 290 }, { "calibration/aurc": 0.2766497709707625, "calibration/batch_distribution_entropy": 0.9876647648242093, "calibration/batch_entropy_100bins": 0.9541677353832169, "calibration/batch_entropy_10bins": 0.9876647648242093, "calibration/batch_entropy_50bins": 0.9780485077011252, "calibration/batch_uniqueness": 0.9672607421875, "calibration/buffer_distribution_entropy": 0.9984667280203248, "calibration/buffer_entropy_100bins": 0.9765595176283114, "calibration/buffer_entropy_10bins": 0.9984667280203248, "calibration/buffer_entropy_50bins": 0.9946736777858677, "calibration/confidence_entropy": 0.5094665348781423, "calibration/coverage@0%": 0.04140625, "calibration/coverage@1%": 0.04140625, "calibration/coverage@10%": 0.1859375, "calibration/coverage@15%": 0.258984375, "calibration/coverage@20%": 0.34375, "calibration/coverage@25%": 0.4078125, "calibration/coverage@30%": 0.534765625, "calibration/coverage@5%": 0.0640625, "calibration/ece": 0.10064012623476563, "calibration/mean_confidence": 0.4969223737652344, "calibration/prompt_uniqueness": 0.871875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 544.2, "completions/max_terminated_length": 544.2, "completions/mean_length": 222.33515625, "completions/mean_terminated_length": 222.33515625, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.944, "grad_norm": 0.0009958718437701464, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 1004940555.0, "reward": 0.9349647402763367, "reward_std": 0.08481650799512863, "rewards/accuracy_reward": 0.52861328125, "rewards/brier_reward": 0.7945270657539367, "rewards/confidence_uniqueness_reward": 0.9638397216796875, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0029068424366414545, "rewards/frontier_coverage_0": 0.11733992844820022, "rewards/frontier_coverage_1": 0.11733992844820022, "rewards/frontier_coverage_10": 0.11630354076623917, "rewards/frontier_coverage_15": 0.10327569544315338, "rewards/frontier_coverage_20": 0.07216155454516411, "rewards/frontier_coverage_25": 0.0491545557975769, "rewards/frontier_coverage_5": 0.11695939749479294, "rewards/frontier_ece_reward": 0.0026014718692749738, "rewards/frontier_entropy_batch_reward": -0.14059094190597535, "signal/accuracy_reward/centered_abs_mean": 0.110321044921875, "signal/accuracy_reward/group_bin_occupancy": 0.17421875, "signal/accuracy_reward/group_std_mean": 0.14301176518201827, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0551605224609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0551605224609375, "signal/advantage_abs_mean": 0.06603206172585488, "signal/advantage_pre_scale_abs_mean": 0.06603206172585488, "signal/advantage_pre_scale_std": 0.10855630040168762, "signal/advantage_std": 0.10855630040168762, "signal/brier_reward/centered_abs_mean": 0.11559386998414993, "signal/brier_reward/group_bin_occupancy": 0.85625, "signal/brier_reward/group_std_mean": 0.14800142645835876, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011559387482702733, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011559387482702733, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014104413986206054, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.852734375, "signal/confidence_uniqueness_reward/group_std_mean": 0.017873943597078324, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014104413567110896, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014104413567110896, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025243773590773342, "signal/frontier_aurc_reward/group_bin_occupancy": 0.725, "signal/frontier_aurc_reward/group_std_mean": 0.004259524215012789, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.155471749778371e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.155471749778371e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16891084015369415, "signal/frontier_coverage_0/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_0/group_std_mean": 0.2158743679523468, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021113855065777896, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021113855065777896, "signal/frontier_coverage_1/centered_abs_mean": 0.16891084015369415, "signal/frontier_coverage_1/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_1/group_std_mean": 0.2158743679523468, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021113855065777896, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021113855065777896, "signal/frontier_coverage_10/centered_abs_mean": 0.16640540361404418, "signal/frontier_coverage_10/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_10/group_std_mean": 0.2127244621515274, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002080067666247487, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002080067666247487, "signal/frontier_coverage_15/centered_abs_mean": 0.14721881449222565, "signal/frontier_coverage_15/group_bin_occupancy": 0.865625, "signal/frontier_coverage_15/group_std_mean": 0.18873787820339202, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018402352230623364, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018402352230623364, "signal/frontier_coverage_20/centered_abs_mean": 0.09060794413089752, "signal/frontier_coverage_20/group_bin_occupancy": 0.878125, "signal/frontier_coverage_20/group_std_mean": 0.11698480546474457, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001132599334232509, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001132599334232509, "signal/frontier_coverage_25/centered_abs_mean": 0.05698830112814903, "signal/frontier_coverage_25/group_bin_occupancy": 0.9109375, "signal/frontier_coverage_25/group_std_mean": 0.07349057048559189, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007123537710867822, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007123537710867822, "signal/frontier_coverage_5/centered_abs_mean": 0.1684437781572342, "signal/frontier_coverage_5/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_5/group_std_mean": 0.2152959108352661, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021055472549051046, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021055472549051046, "signal/frontier_ece_reward/centered_abs_mean": 0.00390915535390377, "signal/frontier_ece_reward/group_bin_occupancy": 0.6515625, "signal/frontier_ece_reward/group_std_mean": 0.004986092075705528, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039091553771868347, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039091553771868347, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19727673530578613, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25940607488155365, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019727673567831516, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019727673567831516, "step": 295 }, { "calibration/aurc": 0.32868832388728325, "calibration/batch_distribution_entropy": 0.9856009062503024, "calibration/batch_entropy_100bins": 0.9540635512806951, "calibration/batch_entropy_10bins": 0.9856009062503024, "calibration/batch_entropy_50bins": 0.975213897153728, "calibration/batch_uniqueness": 0.9663665771484375, "calibration/buffer_distribution_entropy": 0.9985059192901078, "calibration/buffer_entropy_100bins": 0.9767553596300397, "calibration/buffer_entropy_10bins": 0.9985059192901078, "calibration/buffer_entropy_50bins": 0.9946862407467618, "calibration/confidence_entropy": 0.4862125053483779, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.096875, "calibration/coverage@15%": 0.209765625, "calibration/coverage@20%": 0.28046875, "calibration/coverage@25%": 0.34609375, "calibration/coverage@30%": 0.404296875, "calibration/coverage@5%": 0.07421875, "calibration/ece": 0.14568909689453127, "calibration/mean_confidence": 0.5053752603015624, "calibration/prompt_uniqueness": 0.86279296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 696.8, "completions/max_terminated_length": 507.8, "completions/mean_length": 222.09775390625, "completions/mean_terminated_length": 221.96982421875, "completions/min_length": 102.6, "completions/min_terminated_length": 102.6, "epoch": 0.96, "grad_norm": 0.000792116392403841, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 1022155156.0, "reward": 0.9359057307243347, "reward_std": 0.06655998975038528, "rewards/accuracy_reward": 0.52568359375, "rewards/brier_reward": 0.8039435505867004, "rewards/confidence_uniqueness_reward": 0.9652412414550782, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0029766473453491926, "rewards/frontier_coverage_0": 0.11361845880746842, "rewards/frontier_coverage_1": 0.11361845880746842, "rewards/frontier_coverage_10": 0.1131316065788269, "rewards/frontier_coverage_15": 0.10452397763729096, "rewards/frontier_coverage_20": 0.0738675132393837, "rewards/frontier_coverage_25": 0.05374932512640953, "rewards/frontier_coverage_5": 0.11348761469125748, "rewards/frontier_ece_reward": 0.0027279237285256384, "rewards/frontier_entropy_batch_reward": -0.12616288512945176, "signal/accuracy_reward/centered_abs_mean": 0.070672607421875, "signal/accuracy_reward/group_bin_occupancy": 0.1640625, "signal/accuracy_reward/group_std_mean": 0.09988184720277786, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0353363037109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0353363037109375, "signal/advantage_abs_mean": 0.0491713747382164, "signal/advantage_pre_scale_abs_mean": 0.0491713747382164, "signal/advantage_pre_scale_std": 0.08726191073656082, "signal/advantage_std": 0.08726191073656082, "signal/brier_reward/centered_abs_mean": 0.10785721391439437, "signal/brier_reward/group_bin_occupancy": 0.834765625, "signal/brier_reward/group_std_mean": 0.140894290804863, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01078572142869234, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01078572142869234, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013079667091369629, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.851171875, "signal/confidence_uniqueness_reward/group_std_mean": 0.01671627685427666, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013079666998237372, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013079666998237372, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002698933309875429, "signal/frontier_aurc_reward/group_bin_occupancy": 0.691015625, "signal/frontier_aurc_reward/group_std_mean": 0.0046086800284683704, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3736664772732186e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3736664772732186e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14771159887313842, "signal/frontier_coverage_0/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_0/group_std_mean": 0.18986626863479614, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001846395037136972, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001846395037136972, "signal/frontier_coverage_1/centered_abs_mean": 0.14771159887313842, "signal/frontier_coverage_1/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_1/group_std_mean": 0.18986626863479614, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001846395037136972, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001846395037136972, "signal/frontier_coverage_10/centered_abs_mean": 0.14660803675651551, "signal/frontier_coverage_10/group_bin_occupancy": 0.85859375, "signal/frontier_coverage_10/group_std_mean": 0.18843259811401367, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018326004967093468, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018326004967093468, "signal/frontier_coverage_15/centered_abs_mean": 0.13180427700281144, "signal/frontier_coverage_15/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_15/group_std_mean": 0.1698448807001114, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016475534765049815, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016475534765049815, "signal/frontier_coverage_20/centered_abs_mean": 0.08136253356933594, "signal/frontier_coverage_20/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_20/group_std_mean": 0.10504998713731765, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010170316556468606, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010170316556468606, "signal/frontier_coverage_25/centered_abs_mean": 0.056659433990716934, "signal/frontier_coverage_25/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_25/group_std_mean": 0.07215163707733155, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000708242948167026, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000708242948167026, "signal/frontier_coverage_5/centered_abs_mean": 0.14725472331047057, "signal/frontier_coverage_5/group_bin_occupancy": 0.859375, "signal/frontier_coverage_5/group_std_mean": 0.18927786350250245, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018406840972602367, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018406840972602367, "signal/frontier_ece_reward/centered_abs_mean": 0.003890192415565252, "signal/frontier_ece_reward/group_bin_occupancy": 0.630859375, "signal/frontier_ece_reward/group_std_mean": 0.004982131253927946, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038901924854144453, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038901924854144453, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1808852344751358, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.24199664890766143, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01808852329850197, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01808852329850197, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4616949388658732, "eval_calibration/batch_distribution_entropy": 0.927457138975722, "eval_calibration/batch_entropy_100bins": 0.7084673039767129, "eval_calibration/batch_entropy_10bins": 0.927457138975722, "eval_calibration/batch_entropy_50bins": 0.7931473827535757, "eval_calibration/batch_uniqueness": 0.9091796875, "eval_calibration/buffer_distribution_entropy": 0.99835496744854, "eval_calibration/buffer_entropy_100bins": 0.9767894525028088, "eval_calibration/buffer_entropy_10bins": 0.99835496744854, "eval_calibration/buffer_entropy_50bins": 0.9945313200798988, "eval_calibration/confidence_entropy": 0.49295569737742106, "eval_calibration/coverage@0%": 0.0546875, "eval_calibration/coverage@1%": 0.0546875, "eval_calibration/coverage@10%": 0.0546875, "eval_calibration/coverage@15%": 0.0546875, "eval_calibration/coverage@20%": 0.0625, "eval_calibration/coverage@25%": 0.15625, "eval_calibration/coverage@30%": 0.3125, "eval_calibration/coverage@5%": 0.0546875, "eval_calibration/ece": 0.19367187500000002, "eval_calibration/mean_confidence": 0.4317968750000001, "eval_calibration/prompt_uniqueness": 0.9091796875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 380.5, "eval_completions/max_terminated_length": 380.5, "eval_completions/mean_length": 222.7589569091797, "eval_completions/mean_terminated_length": 222.7589569091797, "eval_completions/min_length": 124.0, "eval_completions/min_terminated_length": 124.0, "eval_loss": 0.0, "eval_num_tokens": 1022155156.0, "eval_reward": 0.8408344089984894, "eval_reward_std": 0.23095110431313515, "eval_rewards/accuracy_reward": 0.431640625, "eval_rewards/brier_reward": 0.8074806481599808, "eval_rewards/confidence_uniqueness_reward": 0.9091796875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0033944263705052435, "eval_rewards/frontier_coverage_0": 0.1909365188330412, "eval_rewards/frontier_coverage_1": 0.1909365188330412, "eval_rewards/frontier_coverage_10": 0.18911465257406235, "eval_rewards/frontier_coverage_15": 0.1690607387572527, "eval_rewards/frontier_coverage_20": 0.10447167791426182, "eval_rewards/frontier_coverage_25": 0.05829550698399544, "eval_rewards/frontier_coverage_5": 0.1909365188330412, "eval_rewards/frontier_ece_reward": 0.003318383765872568, "eval_rewards/frontier_entropy_batch_reward": -0.6061325073242188, "eval_runtime": 20.378, "eval_samples_per_second": 24.536, "eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.493147149682045, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875, "eval_signal/advantage_abs_mean": 0.21535654366016388, "eval_signal/advantage_pre_scale_abs_mean": 0.21535654366016388, "eval_signal/advantage_pre_scale_std": 0.2285638451576233, "eval_signal/advantage_std": 0.2285638451576233, "eval_signal/brier_reward/centered_abs_mean": 0.17071156948804855, "eval_signal/brier_reward/group_bin_occupancy": 0.9140625, "eval_signal/brier_reward/group_std_mean": 0.21894007921218872, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017071157693862915, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.017071157693862915, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0352630615234375, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3359375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04285713844001293, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035263061290606856, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035263061290606856, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0043263022089377046, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008629275194834918, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4078777338872897e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4078777338872897e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3566969484090805, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_0/group_std_mean": 0.42929134517908096, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004458711948245764, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004458711948245764, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3566969484090805, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_1/group_std_mean": 0.42929134517908096, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004458711948245764, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004458711948245764, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.352319672703743, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_10/group_std_mean": 0.4243213012814522, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004403996164910495, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004403996164910495, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3145061433315277, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.38166315108537674, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003931326966267079, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003931326966267079, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.17512128874659538, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.90625, "eval_signal/frontier_coverage_20/group_std_mean": 0.22007206827402115, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002189016144257039, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002189016144257039, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0862487182021141, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_25/group_std_mean": 0.10976832546293736, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010781089658848941, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010781089658848941, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3566969484090805, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_5/group_std_mean": 0.42929134517908096, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004458711948245764, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004458711948245764, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.005990799865685403, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8984375, "eval_signal/frontier_ece_reward/group_std_mean": 0.007247602799907327, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005990800127619877, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005990800127619877, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32252073287963867, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33716534078121185, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03225207328796387, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225207328796387, "eval_steps_per_second": 0.196, "step": 300 }, { "calibration/aurc": 0.24985993469125098, "calibration/batch_distribution_entropy": 0.9709909089719659, "calibration/batch_entropy_100bins": 0.9447107719359022, "calibration/batch_entropy_10bins": 0.9709909089719659, "calibration/batch_entropy_50bins": 0.9677236983664186, "calibration/batch_uniqueness": 0.9638519287109375, "calibration/buffer_distribution_entropy": 0.9983593120363045, "calibration/buffer_entropy_100bins": 0.9769284851620673, "calibration/buffer_entropy_10bins": 0.9983593120363045, "calibration/buffer_entropy_50bins": 0.9945877722075391, "calibration/confidence_entropy": 0.5034992213269515, "calibration/coverage@0%": 0.03984375, "calibration/coverage@1%": 0.03984375, "calibration/coverage@10%": 0.28359375, "calibration/coverage@15%": 0.384765625, "calibration/coverage@20%": 0.46328125, "calibration/coverage@25%": 0.54296875, "calibration/coverage@30%": 0.58984375, "calibration/coverage@5%": 0.084765625, "calibration/ece": 0.14316060628906252, "calibration/mean_confidence": 0.48792689371093745, "calibration/prompt_uniqueness": 0.865625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 720.2, "completions/max_terminated_length": 522.8, "completions/mean_length": 223.82119140625, "completions/mean_terminated_length": 223.6925476074219, "completions/min_length": 104.6, "completions/min_terminated_length": 104.6, "epoch": 0.976, "grad_norm": 0.0008361483342014253, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 1039308205.0, "reward": 0.9485756874084472, "reward_std": 0.07829709947109223, "rewards/accuracy_reward": 0.5544921875, "rewards/brier_reward": 0.7974941968917847, "rewards/confidence_uniqueness_reward": 0.9653183341026306, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002789009315893054, "rewards/frontier_coverage_0": 0.10211436282843352, "rewards/frontier_coverage_1": 0.10211436282843352, "rewards/frontier_coverage_10": 0.10185855161398649, "rewards/frontier_coverage_15": 0.09434006288647652, "rewards/frontier_coverage_20": 0.06686797887086868, "rewards/frontier_coverage_25": 0.04957782253623009, "rewards/frontier_coverage_5": 0.10211436282843352, "rewards/frontier_ece_reward": 0.002479456667788327, "rewards/frontier_entropy_batch_reward": -0.12853255420923232, "signal/accuracy_reward/centered_abs_mean": 0.09609375, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.13040682971477507, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.048046875, "signal/advantage_abs_mean": 0.058770731836557386, "signal/advantage_pre_scale_abs_mean": 0.058770731836557386, "signal/advantage_pre_scale_std": 0.10008790940046311, "signal/advantage_std": 0.10008790940046311, "signal/brier_reward/centered_abs_mean": 0.11222728043794632, "signal/brier_reward/group_bin_occupancy": 0.848828125, "signal/brier_reward/group_std_mean": 0.14573695361614228, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011222727596759796, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011222727596759796, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01253490149974823, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.882421875, "signal/confidence_uniqueness_reward/group_std_mean": 0.015887865237891673, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001253490149974823, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001253490149974823, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024836147669702767, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7125, "signal/frontier_aurc_reward/group_std_mean": 0.00410917429253459, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1045184732647614e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1045184732647614e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1621830016374588, "signal/frontier_coverage_0/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_0/group_std_mean": 0.20892693996429443, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002027287520468235, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002027287520468235, "signal/frontier_coverage_1/centered_abs_mean": 0.1621830016374588, "signal/frontier_coverage_1/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_1/group_std_mean": 0.20892693996429443, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002027287520468235, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002027287520468235, "signal/frontier_coverage_10/centered_abs_mean": 0.16161874830722808, "signal/frontier_coverage_10/group_bin_occupancy": 0.86875, "signal/frontier_coverage_10/group_std_mean": 0.2081727385520935, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020202343817800283, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020202343817800283, "signal/frontier_coverage_15/centered_abs_mean": 0.14477128386497498, "signal/frontier_coverage_15/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_15/group_std_mean": 0.18677389919757842, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001809641090221703, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001809641090221703, "signal/frontier_coverage_20/centered_abs_mean": 0.08578696697950364, "signal/frontier_coverage_20/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_20/group_std_mean": 0.11098419278860092, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010723370942287147, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010723370942287147, "signal/frontier_coverage_25/centered_abs_mean": 0.0560153141617775, "signal/frontier_coverage_25/group_bin_occupancy": 0.934375, "signal/frontier_coverage_25/group_std_mean": 0.07195385619997978, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007001914316788315, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007001914316788315, "signal/frontier_coverage_5/centered_abs_mean": 0.1621830016374588, "signal/frontier_coverage_5/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_5/group_std_mean": 0.20892693996429443, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002027287520468235, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002027287520468235, "signal/frontier_ece_reward/centered_abs_mean": 0.0039973936509341, "signal/frontier_ece_reward/group_bin_occupancy": 0.63984375, "signal/frontier_ece_reward/group_std_mean": 0.005053135752677918, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003997393825557083, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003997393825557083, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1855588138103485, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2466350704431534, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01855588089674711, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01855588089674711, "step": 305 }, { "calibration/aurc": 0.3487669014807945, "calibration/batch_distribution_entropy": 0.9754493435740519, "calibration/batch_entropy_100bins": 0.952291511351636, "calibration/batch_entropy_10bins": 0.9754493435740519, "calibration/batch_entropy_50bins": 0.9740999166880734, "calibration/batch_uniqueness": 0.9618896484375, "calibration/buffer_distribution_entropy": 0.9983498943855491, "calibration/buffer_entropy_100bins": 0.9769291495405834, "calibration/buffer_entropy_10bins": 0.9983498943855491, "calibration/buffer_entropy_50bins": 0.9945590427113518, "calibration/confidence_entropy": 0.48104339531250256, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.083203125, "calibration/coverage@15%": 0.112890625, "calibration/coverage@20%": 0.18828125, "calibration/coverage@25%": 0.253125, "calibration/coverage@30%": 0.4546875, "calibration/coverage@5%": 0.031640625, "calibration/ece": 0.13703221653203126, "calibration/mean_confidence": 0.4620144665812499, "calibration/prompt_uniqueness": 0.856494140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 697.8, "completions/max_terminated_length": 486.6, "completions/mean_length": 226.31220703125, "completions/mean_terminated_length": 226.1843505859375, "completions/min_length": 101.6, "completions/min_terminated_length": 101.6, "epoch": 0.992, "grad_norm": 0.001037325244396925, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 1056754122.0, "reward": 0.9321189403533936, "reward_std": 0.07475720196962357, "rewards/accuracy_reward": 0.5267578125, "rewards/brier_reward": 0.7943168759346009, "rewards/confidence_uniqueness_reward": 0.9623941659927369, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002925048628821969, "rewards/frontier_coverage_0": 0.11955789923667907, "rewards/frontier_coverage_1": 0.11955789923667907, "rewards/frontier_coverage_10": 0.11875800639390946, "rewards/frontier_coverage_15": 0.1080111250281334, "rewards/frontier_coverage_20": 0.07299467772245408, "rewards/frontier_coverage_25": 0.053522860258817674, "rewards/frontier_coverage_5": 0.11955789923667907, "rewards/frontier_ece_reward": 0.0025301900692284106, "rewards/frontier_entropy_batch_reward": -0.1599818915128708, "signal/accuracy_reward/centered_abs_mean": 0.0859619140625, "signal/accuracy_reward/group_bin_occupancy": 0.169140625, "signal/accuracy_reward/group_std_mean": 0.11736536026000977, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04298095703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04298095703125, "signal/advantage_abs_mean": 0.05674861744046211, "signal/advantage_pre_scale_abs_mean": 0.05674861744046211, "signal/advantage_pre_scale_std": 0.09784113019704818, "signal/advantage_std": 0.09784113019704818, "signal/brier_reward/centered_abs_mean": 0.1121656432747841, "signal/brier_reward/group_bin_occupancy": 0.848046875, "signal/brier_reward/group_std_mean": 0.14315251410007476, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011216564849019051, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011216564849019051, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01467701867222786, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.837890625, "signal/confidence_uniqueness_reward/group_std_mean": 0.019009753316640853, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014677019091323019, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014677019091323019, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002658972842618823, "signal/frontier_aurc_reward/group_bin_occupancy": 0.715625, "signal/frontier_aurc_reward/group_std_mean": 0.004619904328137636, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.323716045997571e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.323716045997571e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15493883788585663, "signal/frontier_coverage_0/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_0/group_std_mean": 0.19741571545600892, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019367355620488525, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019367355620488525, "signal/frontier_coverage_1/centered_abs_mean": 0.15493883788585663, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.19741571545600892, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019367355620488525, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019367355620488525, "signal/frontier_coverage_10/centered_abs_mean": 0.15374906808137895, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.19593823254108428, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019218633184209465, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019218633184209465, "signal/frontier_coverage_15/centered_abs_mean": 0.13852892816066742, "signal/frontier_coverage_15/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_15/group_std_mean": 0.1765392690896988, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017316116951406001, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017316116951406001, "signal/frontier_coverage_20/centered_abs_mean": 0.07982205301523208, "signal/frontier_coverage_20/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_20/group_std_mean": 0.10253897607326508, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009977756650187074, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009977756650187074, "signal/frontier_coverage_25/centered_abs_mean": 0.05590454265475273, "signal/frontier_coverage_25/group_bin_occupancy": 0.92109375, "signal/frontier_coverage_25/group_std_mean": 0.07175193578004838, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006988067994825542, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006988067994825542, "signal/frontier_coverage_5/centered_abs_mean": 0.15493883788585663, "signal/frontier_coverage_5/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_5/group_std_mean": 0.19741571545600892, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019367355620488525, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019367355620488525, "signal/frontier_ece_reward/centered_abs_mean": 0.0038536326494067906, "signal/frontier_ece_reward/group_bin_occupancy": 0.637109375, "signal/frontier_ece_reward/group_std_mean": 0.0048526331782341005, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038536327192559836, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038536327192559836, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20577935874462128, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.742578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.26543656289577483, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.020577935874462126, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020577935874462126, "step": 310 }, { "calibration/aurc": 0.28304180812555446, "calibration/batch_distribution_entropy": 0.9570053825772713, "calibration/batch_entropy_100bins": 0.9338788681757995, "calibration/batch_entropy_10bins": 0.9570053825772713, "calibration/batch_entropy_50bins": 0.9562927539554573, "calibration/batch_uniqueness": 0.9649124145507812, "calibration/buffer_distribution_entropy": 0.9983675863194514, "calibration/buffer_entropy_100bins": 0.9769959361817846, "calibration/buffer_entropy_10bins": 0.9983675863194514, "calibration/buffer_entropy_50bins": 0.9945730040176226, "calibration/confidence_entropy": 0.48210036103995263, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.0419921875, "calibration/coverage@15%": 0.083984375, "calibration/coverage@20%": 0.1103515625, "calibration/coverage@25%": 0.552734375, "calibration/coverage@30%": 0.6474609375, "calibration/coverage@5%": 0.0078125, "calibration/ece": 0.17189453124999998, "calibration/mean_confidence": 0.5929296875000001, "calibration/prompt_uniqueness": 0.853271484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.5, "completions/max_terminated_length": 517.5, "completions/mean_length": 223.3858871459961, "completions/mean_terminated_length": 223.3858871459961, "completions/min_length": 98.5, "completions/min_terminated_length": 98.5, "epoch": 0.9984, "num_tokens": 1063675115.0, "reward": 0.9411610662937164, "reward_std": 0.07683784514665604, "rewards/accuracy_reward": 0.554443359375, "rewards/brier_reward": 0.7697184383869171, "rewards/confidence_uniqueness_reward": 0.9654731750488281, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003481648047454655, "rewards/frontier_coverage_0": 0.06845960207283497, "rewards/frontier_coverage_1": 0.06845960207283497, "rewards/frontier_coverage_10": 0.06917408481240273, "rewards/frontier_coverage_15": 0.059449709951877594, "rewards/frontier_coverage_20": 0.04144248925149441, "rewards/frontier_coverage_25": 0.03584153205156326, "rewards/frontier_coverage_5": 0.06845960207283497, "rewards/frontier_ece_reward": 0.0017672271933406591, "rewards/frontier_entropy_batch_reward": -0.14854049682617188, "signal/accuracy_reward/centered_abs_mean": 0.0879669189453125, "signal/accuracy_reward/group_bin_occupancy": 0.1669921875, "signal/accuracy_reward/group_std_mean": 0.11592860147356987, "signal/accuracy_reward/group_zero_std_frac": 0.6640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04398345947265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04398345947265625, "signal/advantage_abs_mean": 0.060594651848077774, "signal/advantage_pre_scale_abs_mean": 0.060594651848077774, "signal/advantage_pre_scale_std": 0.10347720980644226, "signal/advantage_std": 0.10347720980644226, "signal/brier_reward/centered_abs_mean": 0.11784609407186508, "signal/brier_reward/group_bin_occupancy": 0.8671875, "signal/brier_reward/group_std_mean": 0.15063194930553436, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011784609407186508, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011784609407186508, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013161659240722656, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.849609375, "signal/confidence_uniqueness_reward/group_std_mean": 0.016746241133660078, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013161659589968622, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013161659589968622, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032848347909748554, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69921875, "signal/frontier_aurc_reward/group_std_mean": 0.005517321405932307, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.106043343199417e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.106043343199417e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14311717450618744, "signal/frontier_coverage_0/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_0/group_std_mean": 0.18515148013830185, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017889646114781499, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017889646114781499, "signal/frontier_coverage_1/centered_abs_mean": 0.14311717450618744, "signal/frontier_coverage_1/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_1/group_std_mean": 0.18515148013830185, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017889646114781499, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017889646114781499, "signal/frontier_coverage_10/centered_abs_mean": 0.14185547828674316, "signal/frontier_coverage_10/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_10/group_std_mean": 0.18353784829378128, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017731935367919505, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017731935367919505, "signal/frontier_coverage_15/centered_abs_mean": 0.1278558410704136, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.16538064926862717, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001598198083229363, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001598198083229363, "signal/frontier_coverage_20/centered_abs_mean": 0.070084098726511, "signal/frontier_coverage_20/group_bin_occupancy": 0.884765625, "signal/frontier_coverage_20/group_std_mean": 0.09156738221645355, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008760512573644519, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008760512573644519, "signal/frontier_coverage_25/centered_abs_mean": 0.051280662417411804, "signal/frontier_coverage_25/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_25/group_std_mean": 0.06649521738290787, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006410082860384136, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006410082860384136, "signal/frontier_coverage_5/centered_abs_mean": 0.14311717450618744, "signal/frontier_coverage_5/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_5/group_std_mean": 0.18515148013830185, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017889646114781499, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017889646114781499, "signal/frontier_ece_reward/centered_abs_mean": 0.004005152499303222, "signal/frontier_ece_reward/group_bin_occupancy": 0.6640625, "signal/frontier_ece_reward/group_std_mean": 0.00511455861851573, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004005152586614713, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004005152586614713, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19959602504968643, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2626366764307022, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019959602504968643, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019959602504968643, "step": 312, "total_flos": 0.0, "train_loss": 0.0045177273673172565, "train_runtime": 60351.758, "train_samples_per_second": 0.331, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1063675115, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }