{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6358064756244601, "calibration/batch_distribution_entropy": 0.6431098183707868, "calibration/batch_entropy_100bins": 0.48089187317226323, "calibration/batch_entropy_10bins": 0.6431098183707868, "calibration/batch_entropy_50bins": 0.5617938193030543, "calibration/batch_uniqueness": 0.7219718974960545, "calibration/confidence_entropy": 0.34696880251966167, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49592420401806236, "calibration/mean_confidence": 0.7925940600227801, "calibration/prompt_uniqueness": 0.5942279192380695, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345703125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1495.6, "completions/mean_length": 270.69580078125, "completions/mean_terminated_length": 225.39390869140624, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.06927429139614105, "learning_rate": 3.1249999999999997e-07, "loss": 0.0744, "num_tokens": 17615957.0, "reward": 0.533476448059082, "reward_std": 0.4068940103054047, "rewards/accuracy_reward": 0.219921875, "rewards/brier_reward": 0.3760594606399536, "rewards/confidence_uniqueness_reward": 0.48737336993217467, "rewards/format_reward": 0.68427734375, "rewards/frontier_aurc_reward": 0.30170206129550936, "rewards/frontier_coverage_0": 0.30170206129550936, "rewards/frontier_coverage_1": 0.30170206129550936, "rewards/frontier_coverage_10": 0.30170206129550936, "rewards/frontier_coverage_15": 0.30170206129550936, "rewards/frontier_coverage_20": 0.30170206129550936, "rewards/frontier_coverage_25": 0.30170206129550936, "rewards/frontier_coverage_5": 0.30170206129550936, "rewards/frontier_ece_reward": 0.30170206129550936, "rewards/frontier_entropy_batch_reward": -0.6530686259269715, "signal/accuracy_reward/centered_abs_mean": 0.2394775390625, "signal/accuracy_reward/group_bin_occupancy": 0.21015625, "signal/accuracy_reward/group_std_mean": 0.28177876472473146, "signal/accuracy_reward/group_zero_std_frac": 0.31875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11973876953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11973876953125, "signal/advantage_abs_mean": 0.34517702460289, "signal/advantage_pre_scale_abs_mean": 0.34517702460289, "signal/advantage_pre_scale_std": 0.4175687491893768, "signal/advantage_std": 0.4175687491893768, "signal/brier_reward/centered_abs_mean": 0.31782959699630736, "signal/brier_reward/group_bin_occupancy": 0.747265625, "signal/brier_reward/group_std_mean": 0.3630960941314697, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031782958284020425, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031782958284020425, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.29565892815589906, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.58359375, "signal/confidence_uniqueness_reward/group_std_mean": 0.3465812742710114, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02956589199602604, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02956589199602604, "signal/format_reward/centered_abs_mean": 0.399285888671875, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.4503865897655487, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1996429443359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1996429443359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.2909155905246735, "signal/frontier_aurc_reward/group_bin_occupancy": 0.65859375, "signal/frontier_aurc_reward/group_std_mean": 0.34205764532089233, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_0/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_0/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_0/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_0/group_zero_std_frac": 0.003125, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_1/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_1/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_1/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_10/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_10/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_10/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_15/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_15/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_15/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_20/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_20/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_20/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_25/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_25/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_25/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_5/centered_abs_mean": 0.2909155905246735, "signal/frontier_coverage_5/group_bin_occupancy": 0.65859375, "signal/frontier_coverage_5/group_std_mean": 0.34205764532089233, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036364448722451927, "signal/frontier_ece_reward/centered_abs_mean": 0.2909155905246735, "signal/frontier_ece_reward/group_bin_occupancy": 0.65859375, "signal/frontier_ece_reward/group_std_mean": 0.34205764532089233, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02909155897796154, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02909155897796154, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.424519544839859, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.314453125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.47118043899536133, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042451954632997516, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042451954632997516, "step": 5 }, { "calibration/aurc": 0.6633336947681945, "calibration/batch_distribution_entropy": 0.6530785282030743, "calibration/batch_entropy_100bins": 0.4852722322513416, "calibration/batch_entropy_10bins": 0.6530785282030743, "calibration/batch_entropy_50bins": 0.5661661966634106, "calibration/batch_uniqueness": 0.7272441743970559, "calibration/confidence_entropy": 0.3523645170870256, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5257630611304681, "calibration/mean_confidence": 0.7933062167842394, "calibration/prompt_uniqueness": 0.6178022073084117, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1488.2, "completions/mean_length": 261.59072265625, "completions/mean_terminated_length": 211.972216796875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.030047137290239334, "learning_rate": 6.249999999999999e-07, "loss": 0.0764, "num_tokens": 35394998.0, "reward": 0.5451710224151611, "reward_std": 0.38366069793701174, "rewards/accuracy_reward": 0.20810546875, "rewards/brier_reward": 0.3810562252998352, "rewards/confidence_uniqueness_reward": 0.5187723219394684, "rewards/format_reward": 0.7197265625, "rewards/frontier_aurc_reward": 0.3000528335571289, "rewards/frontier_coverage_0": 0.3000528335571289, "rewards/frontier_coverage_1": 0.3000528335571289, "rewards/frontier_coverage_10": 0.3000528335571289, "rewards/frontier_coverage_15": 0.3000528335571289, "rewards/frontier_coverage_20": 0.3000528335571289, "rewards/frontier_coverage_25": 0.3000528335571289, "rewards/frontier_coverage_5": 0.3000528335571289, "rewards/frontier_ece_reward": 0.3000528335571289, "rewards/frontier_entropy_batch_reward": -0.6873842597007751, "signal/accuracy_reward/centered_abs_mean": 0.216424560546875, "signal/accuracy_reward/group_bin_occupancy": 0.20703125, "signal/accuracy_reward/group_std_mean": 0.26217670142650606, "signal/accuracy_reward/group_zero_std_frac": 0.34375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1082122802734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1082122802734375, "signal/advantage_abs_mean": 0.3162668466567993, "signal/advantage_pre_scale_abs_mean": 0.3162668466567993, "signal/advantage_pre_scale_std": 0.3942062079906464, "signal/advantage_std": 0.3942062079906464, "signal/brier_reward/centered_abs_mean": 0.3037886917591095, "signal/brier_reward/group_bin_occupancy": 0.775390625, "signal/brier_reward/group_std_mean": 0.3516114354133606, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03037887029349804, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03037887029349804, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.28001424074172976, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.580859375, "signal/confidence_uniqueness_reward/group_std_mean": 0.3388310194015503, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028001424670219422, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028001424670219422, "signal/format_reward/centered_abs_mean": 0.37132568359375, "signal/format_reward/group_bin_occupancy": 0.249609375, "signal/format_reward/group_std_mean": 0.4337587058544159, "signal/format_reward/group_zero_std_frac": 0.003125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.185662841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.185662841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.27608999609947205, "signal/frontier_aurc_reward/group_bin_occupancy": 0.683203125, "signal/frontier_aurc_reward/group_std_mean": 0.33003708720207214, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_0/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_0/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_0/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_1/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_1/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_1/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_10/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_10/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_10/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_15/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_15/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_15/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_20/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_20/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_20/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_25/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_25/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_25/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_5/centered_abs_mean": 0.27608999609947205, "signal/frontier_coverage_5/group_bin_occupancy": 0.683203125, "signal/frontier_coverage_5/group_std_mean": 0.33003708720207214, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003451125044375658, "signal/frontier_ece_reward/centered_abs_mean": 0.27608999609947205, "signal/frontier_ece_reward/group_bin_occupancy": 0.683203125, "signal/frontier_ece_reward/group_std_mean": 0.33003708720207214, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.027609000355005263, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.027609000355005263, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39845545291900636, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4560263633728027, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03984554782509804, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03984554782509804, "step": 10 }, { "calibration/aurc": 0.6056235500133583, "calibration/batch_distribution_entropy": 0.6372280867706955, "calibration/batch_entropy_100bins": 0.48009095551927256, "calibration/batch_entropy_10bins": 0.6372280867706955, "calibration/batch_entropy_50bins": 0.5595683840082752, "calibration/batch_uniqueness": 0.7113122520911674, "calibration/buffer_distribution_entropy": 0.6568801862675887, "calibration/buffer_entropy_100bins": 0.49209269792202925, "calibration/buffer_entropy_10bins": 0.6568801862675887, "calibration/buffer_entropy_50bins": 0.5730805301755447, "calibration/confidence_entropy": 0.35123976578789656, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47231641548769077, "calibration/mean_confidence": 0.804845781710738, "calibration/prompt_uniqueness": 0.6089974924774788, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01650390625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1428.2, "completions/mean_length": 200.514453125, "completions/mean_terminated_length": 178.22185363769532, "completions/min_length": 9.6, "completions/min_terminated_length": 9.6, "epoch": 0.048, "grad_norm": 0.05051470175385475, "learning_rate": 9.374999999999999e-07, "loss": 0.0486, "num_tokens": 52497002.0, "reward": 0.665993869304657, "reward_std": 0.3059393674135208, "rewards/accuracy_reward": 0.27236328125, "rewards/brier_reward": 0.48226693272590637, "rewards/confidence_uniqueness_reward": 0.6447442531585693, "rewards/format_reward": 0.8810546875, "rewards/frontier_aurc_reward": 0.29981047259643673, "rewards/frontier_coverage_0": 0.3134632341563702, "rewards/frontier_coverage_1": 0.3134632341563702, "rewards/frontier_coverage_10": 0.3134632341563702, "rewards/frontier_coverage_15": 0.3134632341563702, "rewards/frontier_coverage_20": 0.3134632341563702, "rewards/frontier_coverage_25": 0.3134632341563702, "rewards/frontier_coverage_5": 0.3134632341563702, "rewards/frontier_ece_reward": 0.2883337765932083, "rewards/frontier_entropy_batch_reward": -0.8342528104782104, "signal/accuracy_reward/centered_abs_mean": 0.202545166015625, "signal/accuracy_reward/group_bin_occupancy": 0.207421875, "signal/accuracy_reward/group_std_mean": 0.2523681789636612, "signal/accuracy_reward/group_zero_std_frac": 0.340625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1012725830078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1012725830078125, "signal/advantage_abs_mean": 0.23214915990829468, "signal/advantage_pre_scale_abs_mean": 0.23214915990829468, "signal/advantage_pre_scale_std": 0.31858267784118655, "signal/advantage_std": 0.31858267784118655, "signal/brier_reward/centered_abs_mean": 0.2716783404350281, "signal/brier_reward/group_bin_occupancy": 0.81015625, "signal/brier_reward/group_std_mean": 0.3263775706291199, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02716783434152603, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02716783434152603, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1999937564134598, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.597265625, "signal/confidence_uniqueness_reward/group_std_mean": 0.26394935250282286, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019999375380575658, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019999375380575658, "signal/format_reward/centered_abs_mean": 0.19697265625, "signal/format_reward/group_bin_occupancy": 0.24140625, "signal/format_reward/group_std_mean": 0.2975906074047089, "signal/format_reward/group_zero_std_frac": 0.06875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.098486328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.098486328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.215498910844326, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625, "signal/frontier_aurc_reward/group_std_mean": 0.2603446511551738, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0026937363953038586, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0026937363953038586, "signal/frontier_coverage_0/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_0/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_0/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_1/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_1/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_1/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_10/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_10/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_10/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_15/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_15/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_15/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_20/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_20/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_20/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_25/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_25/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_25/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_5/centered_abs_mean": 0.23299580514431, "signal/frontier_coverage_5/group_bin_occupancy": 0.70546875, "signal/frontier_coverage_5/group_std_mean": 0.2876336514949799, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029124475782737135, "signal/frontier_ece_reward/centered_abs_mean": 0.24313633441925048, "signal/frontier_ece_reward/group_bin_occupancy": 0.712890625, "signal/frontier_ece_reward/group_std_mean": 0.29327360093593596, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024313633516430854, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024313633516430854, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26260979771614074, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.334375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3697131097316742, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02626098096370697, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02626098096370697, "step": 15 }, { "calibration/aurc": 0.5229047865146916, "calibration/batch_distribution_entropy": 0.6965577173291834, "calibration/batch_entropy_100bins": 0.5146396132435052, "calibration/batch_entropy_10bins": 0.6965577173291834, "calibration/batch_entropy_50bins": 0.6023062687429694, "calibration/batch_uniqueness": 0.7574193666928948, "calibration/buffer_distribution_entropy": 0.657240172374993, "calibration/buffer_entropy_100bins": 0.49448125756617145, "calibration/buffer_entropy_10bins": 0.657240172374993, "calibration/buffer_entropy_50bins": 0.5759482684510908, "calibration/confidence_entropy": 0.37992020571579327, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3746292271322627, "calibration/mean_confidence": 0.7828071817671975, "calibration/prompt_uniqueness": 0.6806407808231312, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00361328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1317.4, "completions/mean_length": 137.51865234375, "completions/mean_terminated_length": 132.4542221069336, "completions/min_length": 24.4, "completions/min_terminated_length": 24.4, "epoch": 0.064, "grad_norm": 0.008556111715734005, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 68823593.0, "reward": 0.7013731718063354, "reward_std": 0.2047277569770813, "rewards/accuracy_reward": 0.3408203125, "rewards/brier_reward": 0.5739028096199036, "rewards/confidence_uniqueness_reward": 0.7480879545211792, "rewards/format_reward": 0.97705078125, "rewards/frontier_aurc_reward": -0.006883773859590292, "rewards/frontier_coverage_0": 0.06146884858608246, "rewards/frontier_coverage_1": 0.06146884858608246, "rewards/frontier_coverage_10": 0.06146884858608246, "rewards/frontier_coverage_15": 0.06146884858608246, "rewards/frontier_coverage_20": 0.06146884858608246, "rewards/frontier_coverage_25": 0.06146884858608246, "rewards/frontier_coverage_5": 0.06146884858608246, "rewards/frontier_ece_reward": -0.050785575062036514, "rewards/frontier_entropy_batch_reward": -0.8997539043426513, "signal/accuracy_reward/centered_abs_mean": 0.211279296875, "signal/accuracy_reward/group_bin_occupancy": 0.20625, "signal/accuracy_reward/group_std_mean": 0.25800455510616305, "signal/accuracy_reward/group_zero_std_frac": 0.35, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1056396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1056396484375, "signal/advantage_abs_mean": 0.1570647269487381, "signal/advantage_pre_scale_abs_mean": 0.1570647269487381, "signal/advantage_pre_scale_std": 0.22092486619949342, "signal/advantage_std": 0.22092486619949342, "signal/brier_reward/centered_abs_mean": 0.24524094462394713, "signal/brier_reward/group_bin_occupancy": 0.844140625, "signal/brier_reward/group_std_mean": 0.3004362642765045, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02452409528195858, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02452409528195858, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1221130445599556, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.1624012291431427, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012211304530501366, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012211304530501366, "signal/format_reward/centered_abs_mean": 0.043353271484375, "signal/format_reward/group_bin_occupancy": 0.190234375, "signal/format_reward/group_std_mean": 0.10569706857204438, "signal/format_reward/group_zero_std_frac": 0.478125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0216766357421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0216766357421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.005248846765607595, "signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625, "signal/frontier_aurc_reward/group_std_mean": 0.007484708447009325, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.56105883535929e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.56105883535929e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_0/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_0/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_1/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_1/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_1/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_10/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_10/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_10/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_15/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_15/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_15/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_20/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_20/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_20/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_25/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_25/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_25/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_5/centered_abs_mean": 0.10489135384559631, "signal/frontier_coverage_5/group_bin_occupancy": 0.695703125, "signal/frontier_coverage_5/group_std_mean": 0.1639949709177017, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013111419510096311, "signal/frontier_ece_reward/centered_abs_mean": 0.14598130881786348, "signal/frontier_ece_reward/group_bin_occupancy": 0.7453125, "signal/frontier_ece_reward/group_std_mean": 0.17359468340873718, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014598131738603115, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014598131738603115, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17333437800407409, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.29973788261413575, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017333437874913215, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017333437874913215, "step": 20 }, { "calibration/aurc": 0.6283088834520059, "calibration/batch_distribution_entropy": 0.8095181952478757, "calibration/batch_entropy_100bins": 0.6000067690369304, "calibration/batch_entropy_10bins": 0.8095181952478757, "calibration/batch_entropy_50bins": 0.6862870972074454, "calibration/batch_uniqueness": 0.831063874562125, "calibration/buffer_distribution_entropy": 0.6868147973448078, "calibration/buffer_entropy_100bins": 0.515115289448944, "calibration/buffer_entropy_10bins": 0.6868147973448078, "calibration/buffer_entropy_50bins": 0.5986524147896154, "calibration/confidence_entropy": 0.45480330324323653, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3972159911131528, "calibration/mean_confidence": 0.7111007678831154, "calibration/prompt_uniqueness": 0.7651551820929442, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 1536.0, "completions/max_terminated_length": 733.0, "completions/mean_length": 118.575, "completions/mean_terminated_length": 116.63536529541015, "completions/min_length": 35.8, "completions/min_terminated_length": 35.8, "epoch": 0.08, "grad_norm": 0.013943054713308811, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 84970953.0, "reward": 0.735591733455658, "reward_std": 0.1745338499546051, "rewards/accuracy_reward": 0.35390625, "rewards/brier_reward": 0.6262224793434144, "rewards/confidence_uniqueness_reward": 0.8280656814575196, "rewards/format_reward": 0.9927734375, "rewards/frontier_aurc_reward": -0.005787147860974074, "rewards/frontier_coverage_0": 0.06969771385192872, "rewards/frontier_coverage_1": 0.06969771385192872, "rewards/frontier_coverage_10": 0.06969771385192872, "rewards/frontier_coverage_15": 0.06969771385192872, "rewards/frontier_coverage_20": 0.06969771385192872, "rewards/frontier_coverage_25": 0.06969771385192872, "rewards/frontier_coverage_5": 0.06969771385192872, "rewards/frontier_ece_reward": -0.04099251367151737, "rewards/frontier_entropy_batch_reward": -0.8510387659072876, "signal/accuracy_reward/centered_abs_mean": 0.19227294921875, "signal/accuracy_reward/group_bin_occupancy": 0.204296875, "signal/accuracy_reward/group_std_mean": 0.24025425910949708, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.096136474609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.096136474609375, "signal/advantage_abs_mean": 0.13553658425807952, "signal/advantage_pre_scale_abs_mean": 0.13553658425807952, "signal/advantage_pre_scale_std": 0.19173393845558168, "signal/advantage_std": 0.19173393845558168, "signal/brier_reward/centered_abs_mean": 0.2213940680027008, "signal/brier_reward/group_bin_occupancy": 0.88359375, "signal/brier_reward/group_std_mean": 0.2728204667568207, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022139406949281692, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022139406949281692, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07314713597297669, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.72421875, "signal/confidence_uniqueness_reward/group_std_mean": 0.10160589665174484, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0073147137649357315, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0073147137649357315, "signal/format_reward/centered_abs_mean": 0.01385498046875, "signal/format_reward/group_bin_occupancy": 0.14921875, "signal/format_reward/group_std_mean": 0.0368439082056284, "signal/format_reward/group_zero_std_frac": 0.80625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006927490234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006927490234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035542991012334824, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74453125, "signal/frontier_aurc_reward/group_std_mean": 0.005189351085573435, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.442873832886107e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.442873832886107e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_0/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_0/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_1/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_1/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_1/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_10/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_10/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_10/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_15/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_15/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_15/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_20/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_20/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_20/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_25/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_25/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_25/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_5/centered_abs_mean": 0.13423685133457183, "signal/frontier_coverage_5/group_bin_occupancy": 0.77578125, "signal/frontier_coverage_5/group_std_mean": 0.1972368836402893, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016779606696218253, "signal/frontier_ece_reward/centered_abs_mean": 0.13046298176050186, "signal/frontier_ece_reward/group_bin_occupancy": 0.812890625, "signal/frontier_ece_reward/group_std_mean": 0.1628311574459076, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013046298176050186, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013046298176050186, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24598013758659362, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.45390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38413644433021543, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02459801435470581, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02459801435470581, "step": 25 }, { "calibration/aurc": 0.648785096123655, "calibration/batch_distribution_entropy": 0.9380209005564305, "calibration/batch_entropy_100bins": 0.7781107329568903, "calibration/batch_entropy_10bins": 0.9380209005564305, "calibration/batch_entropy_50bins": 0.850847448214451, "calibration/batch_uniqueness": 0.9082612624262516, "calibration/buffer_distribution_entropy": 0.7433120040362949, "calibration/buffer_entropy_100bins": 0.564207235491536, "calibration/buffer_entropy_10bins": 0.7433120040362949, "calibration/buffer_entropy_50bins": 0.6484998987649865, "calibration/confidence_entropy": 0.5158145753859638, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.29242145964555044, "calibration/mean_confidence": 0.5788182740863752, "calibration/prompt_uniqueness": 0.8489781667317032, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0017578125, "completions/max_length": 1433.0, "completions/max_terminated_length": 763.8, "completions/mean_length": 115.12734375, "completions/mean_terminated_length": 112.62236328125, "completions/min_length": 37.8, "completions/min_terminated_length": 37.8, "epoch": 0.096, "grad_norm": 0.0034602871164679527, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 101194465.0, "reward": 0.7778663992881775, "reward_std": 0.16724584996700287, "rewards/accuracy_reward": 0.35966796875, "rewards/brier_reward": 0.6825961947441102, "rewards/confidence_uniqueness_reward": 0.906465494632721, "rewards/format_reward": 0.99453125, "rewards/frontier_aurc_reward": -0.0052437069825828075, "rewards/frontier_coverage_0": 0.10269922763109207, "rewards/frontier_coverage_1": 0.10269922763109207, "rewards/frontier_coverage_10": 0.10269922763109207, "rewards/frontier_coverage_15": 0.10269922763109207, "rewards/frontier_coverage_20": 0.10269922763109207, "rewards/frontier_coverage_25": 0.10269922763109207, "rewards/frontier_coverage_5": 0.10269922763109207, "rewards/frontier_ece_reward": -0.03292221836745739, "rewards/frontier_entropy_batch_reward": -0.6376779556274415, "signal/accuracy_reward/centered_abs_mean": 0.185467529296875, "signal/accuracy_reward/group_bin_occupancy": 0.20234375, "signal/accuracy_reward/group_std_mean": 0.23311618864536285, "signal/accuracy_reward/group_zero_std_frac": 0.38125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0927337646484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0927337646484375, "signal/advantage_abs_mean": 0.13114093095064164, "signal/advantage_pre_scale_abs_mean": 0.13114093095064164, "signal/advantage_pre_scale_std": 0.1809857577085495, "signal/advantage_std": 0.1809857577085495, "signal/brier_reward/centered_abs_mean": 0.22195914387702942, "signal/brier_reward/group_bin_occupancy": 0.914453125, "signal/brier_reward/group_std_mean": 0.2706751048564911, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022195914760231972, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022195914760231972, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05482520312070847, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.744921875, "signal/confidence_uniqueness_reward/group_std_mean": 0.0789007768034935, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005482520535588264, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005482520535588264, "signal/format_reward/centered_abs_mean": 0.01048583984375, "signal/format_reward/group_bin_occupancy": 0.144140625, "signal/format_reward/group_std_mean": 0.02846333533525467, "signal/format_reward/group_zero_std_frac": 0.846875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005242919921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005242919921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026835352182388306, "signal/frontier_aurc_reward/group_bin_occupancy": 0.751953125, "signal/frontier_aurc_reward/group_std_mean": 0.004022491350769997, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3544190227985385e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3544190227985385e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_0/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_0/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_1/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_1/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_1/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_10/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_10/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_10/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_15/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_15/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_15/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_20/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_20/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_20/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_25/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_25/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_25/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_5/centered_abs_mean": 0.20481350123882294, "signal/frontier_coverage_5/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_5/group_std_mean": 0.2719453454017639, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025601688772439956, "signal/frontier_ece_reward/centered_abs_mean": 0.12130876779556274, "signal/frontier_ece_reward/group_bin_occupancy": 0.83046875, "signal/frontier_ece_reward/group_std_mean": 0.1663817882537842, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012130877003073692, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012130877003073692, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44812787771224977, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.614453125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5313847541809082, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0448127880692482, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0448127880692482, "step": 30 }, { "calibration/aurc": 0.5053299592442537, "calibration/batch_distribution_entropy": 0.9488818143342655, "calibration/batch_entropy_100bins": 0.9306714343967982, "calibration/batch_entropy_10bins": 0.9488818143342655, "calibration/batch_entropy_50bins": 0.9460526008785772, "calibration/batch_uniqueness": 0.9490444932260778, "calibration/buffer_distribution_entropy": 0.8224099484197692, "calibration/buffer_entropy_100bins": 0.6592172461309497, "calibration/buffer_entropy_10bins": 0.8224099484197692, "calibration/buffer_entropy_50bins": 0.7358472349793205, "calibration/confidence_entropy": 0.5183275438944214, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0027450980392156863, "calibration/coverage@20%": 0.0027450980392156863, "calibration/coverage@25%": 0.008627450980392156, "calibration/coverage@30%": 0.020375273397030044, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1796229260918823, "calibration/mean_confidence": 0.40559769162291126, "calibration/prompt_uniqueness": 0.8886733536752803, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 1536.0, "completions/max_terminated_length": 576.4, "completions/mean_length": 106.2763671875, "completions/mean_terminated_length": 104.87900238037109, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.112, "grad_norm": 0.0038092397153377533, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 117392207.0, "reward": 0.8370797395706177, "reward_std": 0.13981394171714784, "rewards/accuracy_reward": 0.39765625, "rewards/brier_reward": 0.7146290063858032, "rewards/confidence_uniqueness_reward": 0.9453599929809571, "rewards/format_reward": 0.99765625, "rewards/frontier_aurc_reward": -0.004691840149462223, "rewards/frontier_coverage_0": 0.11487203687429429, "rewards/frontier_coverage_1": 0.11487203687429429, "rewards/frontier_coverage_10": 0.11487203687429429, "rewards/frontier_coverage_15": 0.11487203687429429, "rewards/frontier_coverage_20": 0.11487203687429429, "rewards/frontier_coverage_25": 0.11487203687429429, "rewards/frontier_coverage_5": 0.11487203687429429, "rewards/frontier_ece_reward": -0.006113046361133456, "rewards/frontier_entropy_batch_reward": -0.35956743359565735, "signal/accuracy_reward/centered_abs_mean": 0.1920654296875, "signal/accuracy_reward/group_bin_occupancy": 0.20625, "signal/accuracy_reward/group_std_mean": 0.24405551552772523, "signal/accuracy_reward/group_zero_std_frac": 0.35, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09603271484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09603271484375, "signal/advantage_abs_mean": 0.10965722203254699, "signal/advantage_pre_scale_abs_mean": 0.10965722203254699, "signal/advantage_pre_scale_std": 0.15251348316669464, "signal/advantage_std": 0.15251348316669464, "signal/brier_reward/centered_abs_mean": 0.20627183914184571, "signal/brier_reward/group_bin_occupancy": 0.90703125, "signal/brier_reward/group_std_mean": 0.25678886771202086, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020627183839678764, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020627183839678764, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026137924194335936, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.03944253027439117, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002613792475312948, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002613792475312948, "signal/format_reward/centered_abs_mean": 0.00452880859375, "signal/format_reward/group_bin_occupancy": 0.133984375, "signal/format_reward/group_std_mean": 0.012921943515539169, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002264404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002264404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016326952259987592, "signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875, "signal/frontier_aurc_reward/group_std_mean": 0.0026589396875351667, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0408690397744066e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0408690397744066e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_0/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_0/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_1/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_1/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_1/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_10/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_10/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_10/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_15/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_15/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_15/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_20/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_20/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_20/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_25/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_25/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_25/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_5/centered_abs_mean": 0.2962383508682251, "signal/frontier_coverage_5/group_bin_occupancy": 0.94453125, "signal/frontier_coverage_5/group_std_mean": 0.3698026418685913, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003702979441732168, "signal/frontier_ece_reward/centered_abs_mean": 0.06853184774518013, "signal/frontier_ece_reward/group_bin_occupancy": 0.810546875, "signal/frontier_ece_reward/group_std_mean": 0.10600927323102952, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006853185035288334, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006853185035288334, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.421990305185318, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.48495404720306395, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04219903200864792, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04219903200864792, "step": 35 }, { "calibration/aurc": 0.5568013985695004, "calibration/batch_distribution_entropy": 0.9200766038929988, "calibration/batch_entropy_100bins": 0.9329721377257479, "calibration/batch_entropy_10bins": 0.9200766038929988, "calibration/batch_entropy_50bins": 0.9367769710746388, "calibration/batch_uniqueness": 0.942095789057279, "calibration/buffer_distribution_entropy": 0.8899342638622837, "calibration/buffer_entropy_100bins": 0.7478820117698411, "calibration/buffer_entropy_10bins": 0.8899342638622837, "calibration/buffer_entropy_50bins": 0.8124210230759926, "calibration/confidence_entropy": 0.4988997779411394, "calibration/coverage@0%": 0.001175703157975519, "calibration/coverage@1%": 0.001175703157975519, "calibration/coverage@10%": 0.004705114922681402, "calibration/coverage@15%": 0.004705114922681402, "calibration/coverage@20%": 0.0074502129618970875, "calibration/coverage@25%": 0.008626683550132382, "calibration/coverage@30%": 0.018822761981504933, "calibration/coverage@5%": 0.001175703157975519, "calibration/ece": 0.18915972842799716, "calibration/mean_confidence": 0.34738454918178696, "calibration/prompt_uniqueness": 0.8832753255885797, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 1536.0, "completions/max_terminated_length": 759.6, "completions/mean_length": 107.24765625, "completions/mean_terminated_length": 105.431201171875, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.128, "grad_norm": 0.0016961501678451896, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 133407095.0, "reward": 0.8337414741516114, "reward_std": 0.12419430166482925, "rewards/accuracy_reward": 0.39267578125, "rewards/brier_reward": 0.7212756514549256, "rewards/confidence_uniqueness_reward": 0.9402257800102234, "rewards/format_reward": 0.99736328125, "rewards/frontier_aurc_reward": -0.004438658151775599, "rewards/frontier_coverage_0": 0.12889230251312256, "rewards/frontier_coverage_1": 0.12889230251312256, "rewards/frontier_coverage_10": 0.12889230251312256, "rewards/frontier_coverage_15": 0.12889230251312256, "rewards/frontier_coverage_20": 0.12889230251312256, "rewards/frontier_coverage_25": 0.12889230251312256, "rewards/frontier_coverage_5": 0.12889230251312256, "rewards/frontier_ece_reward": 0.0015361378580564633, "rewards/frontier_entropy_batch_reward": -0.3880440592765808, "signal/accuracy_reward/centered_abs_mean": 0.167462158203125, "signal/accuracy_reward/group_bin_occupancy": 0.197265625, "signal/accuracy_reward/group_std_mean": 0.21296925246715545, "signal/accuracy_reward/group_zero_std_frac": 0.421875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0837310791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0837310791015625, "signal/advantage_abs_mean": 0.09632081687450408, "signal/advantage_pre_scale_abs_mean": 0.09632081687450408, "signal/advantage_pre_scale_std": 0.13876967430114745, "signal/advantage_std": 0.13876967430114745, "signal/brier_reward/centered_abs_mean": 0.19647954106330873, "signal/brier_reward/group_bin_occupancy": 0.883984375, "signal/brier_reward/group_std_mean": 0.248234623670578, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019647954031825066, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019647954031825066, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024874152988195418, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.905859375, "signal/confidence_uniqueness_reward/group_std_mean": 0.03841259628534317, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024874153779819606, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024874153779819606, "signal/format_reward/centered_abs_mean": 0.005108642578125, "signal/format_reward/group_bin_occupancy": 0.135546875, "signal/format_reward/group_std_mean": 0.014915533270686865, "signal/format_reward/group_zero_std_frac": 0.915625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025543212890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0025543212890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013979610754176973, "signal/frontier_aurc_reward/group_bin_occupancy": 0.748046875, "signal/frontier_aurc_reward/group_std_mean": 0.0022263232618570327, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7474513515480795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7474513515480795e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_0/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_0/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_1/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_1/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_1/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_10/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_10/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_10/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_15/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_15/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_15/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_20/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_20/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_20/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_25/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_25/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_25/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_5/centered_abs_mean": 0.3031778931617737, "signal/frontier_coverage_5/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_5/group_std_mean": 0.3761015355587006, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037897238973528145, "signal/frontier_ece_reward/centered_abs_mean": 0.05225505083799362, "signal/frontier_ece_reward/group_bin_occupancy": 0.816796875, "signal/frontier_ece_reward/group_std_mean": 0.0818573072552681, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00522550530731678, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00522550530731678, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.420942884683609, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.778515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.483720475435257, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042094288021326066, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042094288021326066, "step": 40 }, { "calibration/aurc": 0.4175940179177261, "calibration/batch_distribution_entropy": 0.979250693438256, "calibration/batch_entropy_100bins": 0.9653378707811878, "calibration/batch_entropy_10bins": 0.979250693438256, "calibration/batch_entropy_50bins": 0.9767701739682157, "calibration/batch_uniqueness": 0.9545605405154486, "calibration/buffer_distribution_entropy": 0.9248228875256566, "calibration/buffer_entropy_100bins": 0.8047559482027328, "calibration/buffer_entropy_10bins": 0.9248228875256566, "calibration/buffer_entropy_50bins": 0.8588074955955491, "calibration/confidence_entropy": 0.5313858867098616, "calibration/coverage@0%": 0.0011734038649706458, "calibration/coverage@1%": 0.0011734038649706458, "calibration/coverage@10%": 0.012892153864970645, "calibration/coverage@15%": 0.014454653864970645, "calibration/coverage@20%": 0.07773590386497065, "calibration/coverage@25%": 0.20117340386497062, "calibration/coverage@30%": 0.20430451932485322, "calibration/coverage@5%": 0.0011734038649706458, "calibration/ece": 0.24015715380201258, "calibration/mean_confidence": 0.4763771020182851, "calibration/prompt_uniqueness": 0.894472107505203, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1201.6, "completions/max_terminated_length": 711.0, "completions/mean_length": 107.3626953125, "completions/mean_terminated_length": 106.80433654785156, "completions/min_length": 41.8, "completions/min_terminated_length": 41.8, "epoch": 0.144, "grad_norm": 0.002021110150963068, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 149456921.0, "reward": 0.8988601326942444, "reward_std": 0.13267979323863982, "rewards/accuracy_reward": 0.50439453125, "rewards/brier_reward": 0.7083608627319335, "rewards/confidence_uniqueness_reward": 0.9530706882476807, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.003968859650194645, "rewards/frontier_coverage_0": 0.02937074126675725, "rewards/frontier_coverage_1": 0.02937074126675725, "rewards/frontier_coverage_10": 0.02937074126675725, "rewards/frontier_coverage_15": 0.02937074126675725, "rewards/frontier_coverage_20": 0.02937074126675725, "rewards/frontier_coverage_25": 0.02937074126675725, "rewards/frontier_coverage_5": 0.02937074126675725, "rewards/frontier_ece_reward": 0.006849961820989847, "rewards/frontier_entropy_batch_reward": -0.22197339236736296, "signal/accuracy_reward/centered_abs_mean": 0.164532470703125, "signal/accuracy_reward/group_bin_occupancy": 0.202734375, "signal/accuracy_reward/group_std_mean": 0.21746462881565093, "signal/accuracy_reward/group_zero_std_frac": 0.378125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0822662353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0822662353515625, "signal/advantage_abs_mean": 0.1036305546760559, "signal/advantage_pre_scale_abs_mean": 0.1036305546760559, "signal/advantage_pre_scale_std": 0.14524299502372742, "signal/advantage_std": 0.14524299502372742, "signal/brier_reward/centered_abs_mean": 0.20842026472091674, "signal/brier_reward/group_bin_occupancy": 0.92578125, "signal/brier_reward/group_std_mean": 0.2574777901172638, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02084202691912651, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02084202691912651, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015083288960158824, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.941015625, "signal/confidence_uniqueness_reward/group_std_mean": 0.02174595184624195, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001508328877389431, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001508328877389431, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_bin_occupancy": 0.12890625, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020871605491265656, "signal/frontier_aurc_reward/group_bin_occupancy": 0.786328125, "signal/frontier_aurc_reward/group_std_mean": 0.003077511163428426, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6089507082360795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6089507082360795e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_0/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_0/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_1/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_1/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_1/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_10/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_10/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_10/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_15/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_15/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_15/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_20/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_20/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_20/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_25/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_25/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_25/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_5/centered_abs_mean": 0.261399644613266, "signal/frontier_coverage_5/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_5/group_std_mean": 0.3283530294895172, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032674957066774367, "signal/frontier_ece_reward/centered_abs_mean": 0.0648654729127884, "signal/frontier_ece_reward/group_bin_occupancy": 0.8375, "signal/frontier_ece_reward/group_std_mean": 0.096609228849411, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006486547738313675, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006486547738313675, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3109690427780151, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38899595737457277, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03109690472483635, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03109690472483635, "step": 45 }, { "calibration/aurc": 0.45887670957463256, "calibration/batch_distribution_entropy": 0.986413046557384, "calibration/batch_entropy_100bins": 0.9701950864389216, "calibration/batch_entropy_10bins": 0.986413046557384, "calibration/batch_entropy_50bins": 0.9786926740327588, "calibration/batch_uniqueness": 0.9566806171713684, "calibration/buffer_distribution_entropy": 0.94049635911715, "calibration/buffer_entropy_100bins": 0.8437552164763602, "calibration/buffer_entropy_10bins": 0.94049635911715, "calibration/buffer_entropy_50bins": 0.8891188471793313, "calibration/confidence_entropy": 0.5183146562190272, "calibration/coverage@0%": 0.000392156862745098, "calibration/coverage@1%": 0.000392156862745098, "calibration/coverage@10%": 0.000392156862745098, "calibration/coverage@15%": 0.000392156862745098, "calibration/coverage@20%": 0.005079656862745098, "calibration/coverage@25%": 0.017189031862745098, "calibration/coverage@30%": 0.025001531862745098, "calibration/coverage@5%": 0.000392156862745098, "calibration/ece": 0.16821106842628572, "calibration/mean_confidence": 0.5462577650957192, "calibration/prompt_uniqueness": 0.895401010503382, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1131.4, "completions/max_terminated_length": 745.4, "completions/mean_length": 116.3083984375, "completions/mean_terminated_length": 115.8933090209961, "completions/min_length": 46.4, "completions/min_terminated_length": 46.4, "epoch": 0.16, "grad_norm": 0.0017517129890620708, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 165668847.0, "reward": 0.8806891083717346, "reward_std": 0.1352734684944153, "rewards/accuracy_reward": 0.45361328125, "rewards/brier_reward": 0.7112634301185607, "rewards/confidence_uniqueness_reward": 0.9563660025596619, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0043065791018307206, "rewards/frontier_coverage_0": 0.06639667674899101, "rewards/frontier_coverage_1": 0.06639667674899101, "rewards/frontier_coverage_10": 0.06639667674899101, "rewards/frontier_coverage_15": 0.06639667674899101, "rewards/frontier_coverage_20": 0.06639667674899101, "rewards/frontier_coverage_25": 0.06639667674899101, "rewards/frontier_coverage_5": 0.06639667674899101, "rewards/frontier_ece_reward": 0.005921919783577323, "rewards/frontier_entropy_batch_reward": -0.18935585916042327, "signal/accuracy_reward/centered_abs_mean": 0.159417724609375, "signal/accuracy_reward/group_bin_occupancy": 0.1921875, "signal/accuracy_reward/group_std_mean": 0.20114850401878356, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0797088623046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0797088623046875, "signal/advantage_abs_mean": 0.10809851884841919, "signal/advantage_pre_scale_abs_mean": 0.10809851884841919, "signal/advantage_pre_scale_std": 0.1511505126953125, "signal/advantage_std": 0.1511505126953125, "signal/brier_reward/centered_abs_mean": 0.21221804320812226, "signal/brier_reward/group_bin_occupancy": 0.920703125, "signal/brier_reward/group_std_mean": 0.2602735161781311, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122180461883545, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02122180461883545, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01266609001904726, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.947265625, "signal/confidence_uniqueness_reward/group_std_mean": 0.017678024619817732, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012666089925915003, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012666089925915003, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002775211539119482, "signal/frontier_aurc_reward/group_bin_occupancy": 0.80078125, "signal/frontier_aurc_reward/group_std_mean": 0.0039797000586986545, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4690145548665895e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4690145548665895e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_0/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_0/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_1/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_1/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_1/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_10/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_10/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_10/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_15/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_15/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_15/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_20/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_20/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_20/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_25/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_25/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_25/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_5/centered_abs_mean": 0.2263072282075882, "signal/frontier_coverage_5/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_5/group_std_mean": 0.2919350802898407, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028288405854254963, "signal/frontier_ece_reward/centered_abs_mean": 0.06984314173460007, "signal/frontier_ece_reward/group_bin_occupancy": 0.89296875, "signal/frontier_ece_reward/group_std_mean": 0.0957074835896492, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006984313949942589, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006984313949942589, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2799877405166626, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755078125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35995004177093504, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027998774126172066, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027998774126172066, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6514437234622675, "eval_calibration/batch_distribution_entropy": 0.9300585720206449, "eval_calibration/batch_entropy_100bins": 0.6905406002692105, "eval_calibration/batch_entropy_10bins": 0.9300585720206449, "eval_calibration/batch_entropy_50bins": 0.7765363996158673, "eval_calibration/batch_uniqueness": 0.8994140625, "eval_calibration/buffer_distribution_entropy": 0.9462761211614454, "eval_calibration/buffer_entropy_100bins": 0.8623138320227769, "eval_calibration/buffer_entropy_10bins": 0.9462761211614454, "eval_calibration/buffer_entropy_50bins": 0.9029670207624898, "eval_calibration/confidence_entropy": 0.5075045392400461, "eval_calibration/coverage@0%": 0.0078125, "eval_calibration/coverage@1%": 0.0078125, "eval_calibration/coverage@10%": 0.0078125, "eval_calibration/coverage@15%": 0.0078125, "eval_calibration/coverage@20%": 0.0078125, "eval_calibration/coverage@25%": 0.0078125, "eval_calibration/coverage@30%": 0.0078125, "eval_calibration/coverage@5%": 0.0078125, "eval_calibration/ece": 0.310032909152462, "eval_calibration/mean_confidence": 0.5270542874048246, "eval_calibration/prompt_uniqueness": 0.8994140625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 294.75, "eval_completions/max_terminated_length": 294.75, "eval_completions/mean_length": 126.42483901977539, "eval_completions/mean_terminated_length": 126.42483901977539, "eval_completions/min_length": 61.5, "eval_completions/min_terminated_length": 61.5, "eval_loss": 0.0, "eval_num_tokens": 165668847.0, "eval_reward": 0.7603507339954376, "eval_reward_std": 0.23446262627840042, "eval_rewards/accuracy_reward": 0.3828125, "eval_rewards/brier_reward": 0.6980591118335724, "eval_rewards/confidence_uniqueness_reward": 0.898681640625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004815749707631767, "eval_rewards/frontier_coverage_0": 0.10749666392803192, "eval_rewards/frontier_coverage_1": 0.10749666392803192, "eval_rewards/frontier_coverage_10": 0.10749666392803192, "eval_rewards/frontier_coverage_15": 0.10749666392803192, "eval_rewards/frontier_coverage_20": 0.10749666392803192, "eval_rewards/frontier_coverage_25": 0.10749666392803192, "eval_rewards/frontier_coverage_5": 0.10749666392803192, "eval_rewards/frontier_ece_reward": -0.0007533840253017843, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 17.0809, "eval_samples_per_second": 29.272, "eval_signal/accuracy_reward/centered_abs_mean": 0.45849609375, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.485101580619812, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.229248046875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.229248046875, "eval_signal/advantage_abs_mean": 0.21446801349520683, "eval_signal/advantage_pre_scale_abs_mean": 0.21446801349520683, "eval_signal/advantage_pre_scale_std": 0.23238081485033035, "eval_signal/advantage_std": 0.23238081485033035, "eval_signal/brier_reward/centered_abs_mean": 0.23739226162433624, "eval_signal/brier_reward/group_bin_occupancy": 0.96875, "eval_signal/brier_reward/group_std_mean": 0.28240957856178284, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02373922662809491, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02373922662809491, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0388031005859375, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.390625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04573572054505348, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038803101051598787, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038803101051598787, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003774499346036464, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.859375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0055051157251000404, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7181244553939905e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7181244553939905e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_0/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_20/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_25/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3006228432059288, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.4013464003801346, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037577852490358055, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.07121825404465199, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8984375, "eval_signal/frontier_ece_reward/group_std_mean": 0.10425052046775818, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007121825474314392, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007121825474314392, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.234, "step": 50 }, { "calibration/aurc": 0.41083644913153056, "calibration/batch_distribution_entropy": 0.9935570906004912, "calibration/batch_entropy_100bins": 0.9715693910526587, "calibration/batch_entropy_10bins": 0.9935570906004912, "calibration/batch_entropy_50bins": 0.9855893029338851, "calibration/batch_uniqueness": 0.9587646484375, "calibration/buffer_distribution_entropy": 0.9504008284813109, "calibration/buffer_entropy_100bins": 0.8723124863154184, "calibration/buffer_entropy_10bins": 0.9504008284813109, "calibration/buffer_entropy_50bins": 0.9103123756016671, "calibration/confidence_entropy": 0.4926616876541866, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.00078125, "calibration/coverage@20%": 0.014453125, "calibration/coverage@25%": 0.027734375, "calibration/coverage@30%": 0.1140625, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.19613354282701326, "calibration/mean_confidence": 0.5161718102828323, "calibration/prompt_uniqueness": 0.89189453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 873.2, "completions/max_terminated_length": 439.2, "completions/mean_length": 132.3017578125, "completions/mean_terminated_length": 132.02688598632812, "completions/min_length": 53.8, "completions/min_terminated_length": 53.8, "epoch": 0.176, "grad_norm": 0.0015510269440710545, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 182260737.0, "reward": 0.8931734323501587, "reward_std": 0.12140908688306809, "rewards/accuracy_reward": 0.46357421875, "rewards/brier_reward": 0.7294286847114563, "rewards/confidence_uniqueness_reward": 0.9577026724815368, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.003854288347065449, "rewards/frontier_coverage_0": 0.09462544620037079, "rewards/frontier_coverage_1": 0.09462544620037079, "rewards/frontier_coverage_10": 0.09462544620037079, "rewards/frontier_coverage_15": 0.09462544620037079, "rewards/frontier_coverage_20": 0.09462544620037079, "rewards/frontier_coverage_25": 0.09462544620037079, "rewards/frontier_coverage_5": 0.09462544620037079, "rewards/frontier_ece_reward": 0.013301673159003258, "rewards/frontier_entropy_batch_reward": -0.1659554123878479, "signal/accuracy_reward/centered_abs_mean": 0.140850830078125, "signal/accuracy_reward/group_bin_occupancy": 0.188671875, "signal/accuracy_reward/group_std_mean": 0.18363622725009918, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0704254150390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0704254150390625, "signal/advantage_abs_mean": 0.09490404278039932, "signal/advantage_pre_scale_abs_mean": 0.09490404278039932, "signal/advantage_pre_scale_std": 0.13685409128665924, "signal/advantage_std": 0.13685409128665924, "signal/brier_reward/centered_abs_mean": 0.20794688463211058, "signal/brier_reward/group_bin_occupancy": 0.905078125, "signal/brier_reward/group_std_mean": 0.25734142661094667, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020794688165187834, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020794688165187834, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011929828859865665, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016922668367624284, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011929828440770506, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011929828440770506, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002570530725643039, "signal/frontier_aurc_reward/group_bin_occupancy": 0.776953125, "signal/frontier_aurc_reward/group_std_mean": 0.0038315205834805965, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.213163508917205e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.213163508917205e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_0/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_0/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_1/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_1/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_1/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_10/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_10/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_10/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_15/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_15/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_15/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_20/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_20/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_20/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_25/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_25/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_25/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_5/centered_abs_mean": 0.25130972266197205, "signal/frontier_coverage_5/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_5/group_std_mean": 0.3175831615924835, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003141371626406908, "signal/frontier_ece_reward/centered_abs_mean": 0.06303459852933883, "signal/frontier_ece_reward/group_bin_occupancy": 0.8765625, "signal/frontier_ece_reward/group_std_mean": 0.08597700744867325, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006303459964692592, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006303459964692592, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2539799213409424, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33189951777458193, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02539799325168133, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02539799325168133, "step": 55 }, { "calibration/aurc": 0.34942118001298506, "calibration/batch_distribution_entropy": 0.9799110967706934, "calibration/batch_entropy_100bins": 0.9610832692207, "calibration/batch_entropy_10bins": 0.9799110967706934, "calibration/batch_entropy_50bins": 0.9736960127197387, "calibration/batch_uniqueness": 0.9573197846138358, "calibration/buffer_distribution_entropy": 0.9584469854664572, "calibration/buffer_entropy_100bins": 0.8916949290759867, "calibration/buffer_entropy_10bins": 0.9584469854664572, "calibration/buffer_entropy_50bins": 0.9246897994577994, "calibration/confidence_entropy": 0.46040453975152423, "calibration/coverage@0%": 0.010939028864970645, "calibration/coverage@1%": 0.010939028864970645, "calibration/coverage@10%": 0.014454653864970645, "calibration/coverage@15%": 0.01915132705479452, "calibration/coverage@20%": 0.10548938967710372, "calibration/coverage@25%": 0.20474865459882582, "calibration/coverage@30%": 0.3540063906555773, "calibration/coverage@5%": 0.010939028864970645, "calibration/ece": 0.14488413962169583, "calibration/mean_confidence": 0.4789294839135028, "calibration/prompt_uniqueness": 0.8839207612513007, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 870.8, "completions/max_terminated_length": 420.6, "completions/mean_length": 143.64931640625, "completions/mean_terminated_length": 143.24107360839844, "completions/min_length": 60.6, "completions/min_terminated_length": 60.6, "epoch": 0.192, "grad_norm": 0.0014950234908610582, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 198546522.0, "reward": 0.9052001118659974, "reward_std": 0.11826727986335754, "rewards/accuracy_reward": 0.491796875, "rewards/brier_reward": 0.7406868457794189, "rewards/confidence_uniqueness_reward": 0.9558995842933655, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0034085330553352833, "rewards/frontier_coverage_0": 0.09948756024241448, "rewards/frontier_coverage_1": 0.09948756024241448, "rewards/frontier_coverage_10": 0.09948756024241448, "rewards/frontier_coverage_15": 0.09948756024241448, "rewards/frontier_coverage_20": 0.09948756024241448, "rewards/frontier_coverage_25": 0.09948756024241448, "rewards/frontier_coverage_5": 0.09948756024241448, "rewards/frontier_ece_reward": 0.01857722718268633, "rewards/frontier_entropy_batch_reward": -0.20535460412502288, "signal/accuracy_reward/centered_abs_mean": 0.13638916015625, "signal/accuracy_reward/group_bin_occupancy": 0.18984375, "signal/accuracy_reward/group_std_mean": 0.18098436594009398, "signal/accuracy_reward/group_zero_std_frac": 0.48125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.068194580078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.068194580078125, "signal/advantage_abs_mean": 0.0910866379737854, "signal/advantage_pre_scale_abs_mean": 0.0910866379737854, "signal/advantage_pre_scale_std": 0.1333424761891365, "signal/advantage_std": 0.1333424761891365, "signal/brier_reward/centered_abs_mean": 0.2078978717327118, "signal/brier_reward/group_bin_occupancy": 0.876953125, "signal/brier_reward/group_std_mean": 0.2592237114906311, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020789787545800208, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020789787545800208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015980724617838858, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.897265625, "signal/confidence_uniqueness_reward/group_std_mean": 0.02260695695877075, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015980724710971117, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015980724710971117, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.003866990189999342, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002407692139968276, "signal/frontier_aurc_reward/group_bin_occupancy": 0.776171875, "signal/frontier_aurc_reward/group_std_mean": 0.00357803120277822, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.009615102200769e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.009615102200769e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_0/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_0/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_1/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_1/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_1/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_10/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_10/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_10/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_15/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_15/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_15/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_20/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_20/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_20/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_25/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_25/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_25/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_5/centered_abs_mean": 0.262815922498703, "signal/frontier_coverage_5/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_5/group_std_mean": 0.33183927536010743, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003285199077799916, "signal/frontier_ece_reward/centered_abs_mean": 0.057891517877578735, "signal/frontier_ece_reward/group_bin_occupancy": 0.860546875, "signal/frontier_ece_reward/group_std_mean": 0.07930349558591843, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005789151694625616, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005789151694625616, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3005888402462006, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.749609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.379769903421402, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030058884248137473, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030058884248137473, "step": 60 }, { "calibration/aurc": 0.2947941744415761, "calibration/batch_distribution_entropy": 0.9785599217065368, "calibration/batch_entropy_100bins": 0.9581661507133953, "calibration/batch_entropy_10bins": 0.9785599217065368, "calibration/batch_entropy_50bins": 0.9720473639947895, "calibration/batch_uniqueness": 0.9595977783203125, "calibration/buffer_distribution_entropy": 0.963559167866169, "calibration/buffer_entropy_100bins": 0.906266340287414, "calibration/buffer_entropy_10bins": 0.963559167866169, "calibration/buffer_entropy_50bins": 0.9352872608533884, "calibration/confidence_entropy": 0.47836497278573387, "calibration/coverage@0%": 0.012109375, "calibration/coverage@1%": 0.012109375, "calibration/coverage@10%": 0.0625, "calibration/coverage@15%": 0.185546875, "calibration/coverage@20%": 0.290234375, "calibration/coverage@25%": 0.455078125, "calibration/coverage@30%": 0.572265625, "calibration/coverage@5%": 0.021484375, "calibration/ece": 0.15920833551919994, "calibration/mean_confidence": 0.5314152702200673, "calibration/prompt_uniqueness": 0.88876953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.8, "completions/max_terminated_length": 461.8, "completions/mean_length": 155.57880859375, "completions/mean_terminated_length": 155.57880859375, "completions/min_length": 65.8, "completions/min_terminated_length": 65.8, "epoch": 0.208, "grad_norm": 0.0011343832593411207, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 215171873.0, "reward": 0.9295921802520752, "reward_std": 0.11272455304861069, "rewards/accuracy_reward": 0.5361328125, "rewards/brier_reward": 0.753898274898529, "rewards/confidence_uniqueness_reward": 0.960012423992157, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003036556579172611, "rewards/frontier_coverage_0": 0.07245685756206513, "rewards/frontier_coverage_1": 0.07245685756206513, "rewards/frontier_coverage_10": 0.07245685756206513, "rewards/frontier_coverage_15": 0.07245685756206513, "rewards/frontier_coverage_20": 0.07245685756206513, "rewards/frontier_coverage_25": 0.07245685756206513, "rewards/frontier_coverage_5": 0.07245685756206513, "rewards/frontier_ece_reward": 0.022372994944453238, "rewards/frontier_entropy_batch_reward": -0.18355790972709657, "signal/accuracy_reward/centered_abs_mean": 0.13197021484375, "signal/accuracy_reward/group_bin_occupancy": 0.187890625, "signal/accuracy_reward/group_std_mean": 0.17548914551734923, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065985107421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.065985107421875, "signal/advantage_abs_mean": 0.08748974055051803, "signal/advantage_pre_scale_abs_mean": 0.08748974055051803, "signal/advantage_pre_scale_std": 0.1294364556670189, "signal/advantage_std": 0.1294364556670189, "signal/brier_reward/centered_abs_mean": 0.1890464246273041, "signal/brier_reward/group_bin_occupancy": 0.86875, "signal/brier_reward/group_std_mean": 0.23756815493106842, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0189046423882246, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0189046423882246, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012290091067552567, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.928125, "signal/confidence_uniqueness_reward/group_std_mean": 0.015977666527032853, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012290091253817081, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012290091253817081, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024446202907711266, "signal/frontier_aurc_reward/group_bin_occupancy": 0.78046875, "signal/frontier_aurc_reward/group_std_mean": 0.003613197011873126, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.055775378015824e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.055775378015824e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_0/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_0/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_1/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_1/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_1/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_10/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_10/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_10/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_15/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_15/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_15/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_20/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_20/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_20/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_25/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_25/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_25/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_5/centered_abs_mean": 0.22794330716133118, "signal/frontier_coverage_5/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_5/group_std_mean": 0.2930518627166748, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00284929140470922, "signal/frontier_ece_reward/centered_abs_mean": 0.0545510284602642, "signal/frontier_ece_reward/group_bin_occupancy": 0.84921875, "signal/frontier_ece_reward/group_std_mean": 0.07529444098472596, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00545510295778513, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00545510295778513, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27159354090690613, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3504547536373138, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027159354835748672, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027159354835748672, "step": 65 }, { "calibration/aurc": 0.3189934672338976, "calibration/batch_distribution_entropy": 0.9939897166524112, "calibration/batch_entropy_100bins": 0.967330341286187, "calibration/batch_entropy_10bins": 0.9939897166524112, "calibration/batch_entropy_50bins": 0.9831548626039949, "calibration/batch_uniqueness": 0.9600825121686686, "calibration/buffer_distribution_entropy": 0.9681576729442384, "calibration/buffer_entropy_100bins": 0.9180615027527184, "calibration/buffer_entropy_10bins": 0.9681576729442384, "calibration/buffer_entropy_50bins": 0.9437593307933965, "calibration/confidence_entropy": 0.4933469731122976, "calibration/coverage@0%": 0.00390625, "calibration/coverage@1%": 0.00390625, "calibration/coverage@10%": 0.09296875, "calibration/coverage@15%": 0.2039675245098039, "calibration/coverage@20%": 0.29543045343137253, "calibration/coverage@25%": 0.4104151348039215, "calibration/coverage@30%": 0.5085263480392157, "calibration/coverage@5%": 0.00390625, "calibration/ece": 0.15230069879851205, "calibration/mean_confidence": 0.5016724122037817, "calibration/prompt_uniqueness": 0.885172063334634, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1095.8, "completions/max_terminated_length": 681.2, "completions/mean_length": 163.984765625, "completions/mean_terminated_length": 163.58394470214844, "completions/min_length": 66.8, "completions/min_terminated_length": 66.8, "epoch": 0.224, "grad_norm": 0.0011190164368599653, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 232004261.0, "reward": 0.9104782462120056, "reward_std": 0.10773791372776031, "rewards/accuracy_reward": 0.48671875, "rewards/brier_reward": 0.7708696365356446, "rewards/confidence_uniqueness_reward": 0.9604137420654297, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003139969985932112, "rewards/frontier_coverage_0": 0.11844078451395035, "rewards/frontier_coverage_1": 0.11844078451395035, "rewards/frontier_coverage_10": 0.11844078451395035, "rewards/frontier_coverage_15": 0.11844078451395035, "rewards/frontier_coverage_20": 0.11844078451395035, "rewards/frontier_coverage_25": 0.11844078451395035, "rewards/frontier_coverage_5": 0.11844078451395035, "rewards/frontier_ece_reward": 0.021617041900753975, "rewards/frontier_entropy_batch_reward": -0.18300187289714814, "signal/accuracy_reward/centered_abs_mean": 0.1204345703125, "signal/accuracy_reward/group_bin_occupancy": 0.182421875, "signal/accuracy_reward/group_std_mean": 0.15962167084217072, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06021728515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06021728515625, "signal/advantage_abs_mean": 0.08397592157125473, "signal/advantage_pre_scale_abs_mean": 0.08397592157125473, "signal/advantage_pre_scale_std": 0.1261191889643669, "signal/advantage_std": 0.1261191889643669, "signal/brier_reward/centered_abs_mean": 0.17388453483581542, "signal/brier_reward/group_bin_occupancy": 0.8765625, "signal/brier_reward/group_std_mean": 0.219650474190712, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01738845370709896, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01738845370709896, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012389418855309486, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9328125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01659379303455353, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012389418901875616, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012389418901875616, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023782884702086447, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7796875, "signal/frontier_aurc_reward/group_std_mean": 0.003504908038303256, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.972860493173357e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.972860493173357e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_0/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_0/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_1/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_1/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_1/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_10/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_10/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_10/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_15/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_15/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_15/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_20/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_20/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_20/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_25/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_25/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_25/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_5/centered_abs_mean": 0.21117229461669923, "signal/frontier_coverage_5/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_5/group_std_mean": 0.27140182852745054, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026396537199616433, "signal/frontier_ece_reward/centered_abs_mean": 0.04839524030685425, "signal/frontier_ece_reward/group_bin_occupancy": 0.842578125, "signal/frontier_ece_reward/group_std_mean": 0.06754831522703171, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004839524254202842, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004839524254202842, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2688676655292511, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34602165818214414, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026886767894029617, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026886767894029617, "step": 70 }, { "calibration/aurc": 0.3722649127966441, "calibration/batch_distribution_entropy": 0.9747995210345157, "calibration/batch_entropy_100bins": 0.9571055886452392, "calibration/batch_entropy_10bins": 0.9747995210345157, "calibration/batch_entropy_50bins": 0.9714064992183363, "calibration/batch_uniqueness": 0.9584098953076943, "calibration/buffer_distribution_entropy": 0.9721121659408224, "calibration/buffer_entropy_100bins": 0.9276944237884651, "calibration/buffer_entropy_10bins": 0.9721121659408224, "calibration/buffer_entropy_50bins": 0.9509444674613132, "calibration/confidence_entropy": 0.4904281243945256, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.078125, "calibration/coverage@15%": 0.144921875, "calibration/coverage@20%": 0.20859375, "calibration/coverage@25%": 0.257421875, "calibration/coverage@30%": 0.278515625, "calibration/coverage@5%": 0.036328125, "calibration/ece": 0.17131989400604017, "calibration/mean_confidence": 0.5100639151774843, "calibration/prompt_uniqueness": 0.8860584270941727, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1245.6, "completions/max_terminated_length": 700.6, "completions/mean_length": 166.92529296875, "completions/mean_terminated_length": 166.2565490722656, "completions/min_length": 68.4, "completions/min_terminated_length": 68.4, "epoch": 0.24, "grad_norm": 0.0012799223186448216, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 248965256.0, "reward": 0.9279002666473388, "reward_std": 0.11562621295452118, "rewards/accuracy_reward": 0.54033203125, "rewards/brier_reward": 0.752100133895874, "rewards/confidence_uniqueness_reward": 0.9600031733512878, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0029632408171892167, "rewards/frontier_coverage_0": 0.06165201477706432, "rewards/frontier_coverage_1": 0.06165201477706432, "rewards/frontier_coverage_10": 0.06165201477706432, "rewards/frontier_coverage_15": 0.06165201477706432, "rewards/frontier_coverage_20": 0.06165201477706432, "rewards/frontier_coverage_25": 0.06165201477706432, "rewards/frontier_coverage_5": 0.06165201477706432, "rewards/frontier_ece_reward": 0.018022438511252403, "rewards/frontier_entropy_batch_reward": -0.2034287005662918, "signal/accuracy_reward/centered_abs_mean": 0.137420654296875, "signal/accuracy_reward/group_bin_occupancy": 0.18828125, "signal/accuracy_reward/group_std_mean": 0.1800607681274414, "signal/accuracy_reward/group_zero_std_frac": 0.49375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0687103271484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0687103271484375, "signal/advantage_abs_mean": 0.0900656446814537, "signal/advantage_pre_scale_abs_mean": 0.0900656446814537, "signal/advantage_pre_scale_std": 0.13366247713565826, "signal/advantage_std": 0.13366247713565826, "signal/brier_reward/centered_abs_mean": 0.17995524406433105, "signal/brier_reward/group_bin_occupancy": 0.87421875, "signal/brier_reward/group_std_mean": 0.2263825535774231, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017995523661375044, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017995523661375044, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012892700731754303, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.92578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018097008019685744, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012892701663076878, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012892701663076878, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002403355622664094, "signal/frontier_aurc_reward/group_bin_occupancy": 0.784375, "signal/frontier_aurc_reward/group_std_mean": 0.0035244593862444164, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0041945865377784e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0041945865377784e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_0/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_0/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_1/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_1/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_1/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_10/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_10/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_10/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_15/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_15/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_15/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_20/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_20/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_20/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_25/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_25/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_25/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_5/centered_abs_mean": 0.21193841695785523, "signal/frontier_coverage_5/group_bin_occupancy": 0.8953125, "signal/frontier_coverage_5/group_std_mean": 0.2735629081726074, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026492302305996416, "signal/frontier_ece_reward/centered_abs_mean": 0.04577092379331589, "signal/frontier_ece_reward/group_bin_occupancy": 0.835546875, "signal/frontier_ece_reward/group_std_mean": 0.06537232622504234, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004577092453837394, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004577092453837394, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28712775707244875, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3657856583595276, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02871277555823326, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02871277555823326, "step": 75 }, { "calibration/aurc": 0.2756884567471795, "calibration/batch_distribution_entropy": 0.981849015365291, "calibration/batch_entropy_100bins": 0.963794621717061, "calibration/batch_entropy_10bins": 0.981849015365291, "calibration/batch_entropy_50bins": 0.9772544795865941, "calibration/batch_uniqueness": 0.9583403270974067, "calibration/buffer_distribution_entropy": 0.9748055844875566, "calibration/buffer_entropy_100bins": 0.9354970264003273, "calibration/buffer_entropy_10bins": 0.9748055844875566, "calibration/buffer_entropy_50bins": 0.956262724283142, "calibration/confidence_entropy": 0.47419515340800844, "calibration/coverage@0%": 0.019922639432485324, "calibration/coverage@1%": 0.019922639432485324, "calibration/coverage@10%": 0.1671882644324853, "calibration/coverage@15%": 0.29453660102739726, "calibration/coverage@20%": 0.3945366010273973, "calibration/coverage@25%": 0.46133347602739727, "calibration/coverage@30%": 0.6404713490704501, "calibration/coverage@5%": 0.08242263943248532, "calibration/ece": 0.1516829189459081, "calibration/mean_confidence": 0.4916690845191692, "calibration/prompt_uniqueness": 0.8781654445239333, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 878.2, "completions/max_terminated_length": 482.8, "completions/mean_length": 163.8306640625, "completions/mean_terminated_length": 163.4287567138672, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.256, "grad_norm": 0.001148949726484716, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 265697698.0, "reward": 0.9235002279281617, "reward_std": 0.1053592398762703, "rewards/accuracy_reward": 0.5205078125, "rewards/brier_reward": 0.7646861553192139, "rewards/confidence_uniqueness_reward": 0.9591035127639771, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0028343759011477234, "rewards/frontier_coverage_0": 0.09306152537465096, "rewards/frontier_coverage_1": 0.09306152537465096, "rewards/frontier_coverage_10": 0.09306152537465096, "rewards/frontier_coverage_15": 0.09306152537465096, "rewards/frontier_coverage_20": 0.09306152537465096, "rewards/frontier_coverage_25": 0.09306152537465096, "rewards/frontier_coverage_5": 0.09306152537465096, "rewards/frontier_ece_reward": 0.018909335136413574, "rewards/frontier_entropy_batch_reward": -0.18935712277889252, "signal/accuracy_reward/centered_abs_mean": 0.12547607421875, "signal/accuracy_reward/group_bin_occupancy": 0.180078125, "signal/accuracy_reward/group_std_mean": 0.16060097515583038, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062738037109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062738037109375, "signal/advantage_abs_mean": 0.08297341018915176, "signal/advantage_pre_scale_abs_mean": 0.08297341018915176, "signal/advantage_pre_scale_std": 0.12462374716997146, "signal/advantage_std": 0.12462374716997146, "signal/brier_reward/centered_abs_mean": 0.1717162013053894, "signal/brier_reward/group_bin_occupancy": 0.865625, "signal/brier_reward/group_std_mean": 0.21583383977413179, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017171620205044747, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017171620205044747, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012881658598780632, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.929296875, "signal/confidence_uniqueness_reward/group_std_mean": 0.017492034845054148, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012881658738479018, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012881658738479018, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022053365129977463, "signal/frontier_aurc_reward/group_bin_occupancy": 0.780078125, "signal/frontier_aurc_reward/group_std_mean": 0.00320956208743155, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7566706557990982e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7566706557990982e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_0/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_0/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_1/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_1/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_1/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_10/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_10/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_10/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_15/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_15/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_15/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_20/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_20/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_20/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_25/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_25/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_25/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_5/centered_abs_mean": 0.21591795980930328, "signal/frontier_coverage_5/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_5/group_std_mean": 0.2742127299308777, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026989746373146774, "signal/frontier_ece_reward/centered_abs_mean": 0.04098983183503151, "signal/frontier_ece_reward/group_bin_occupancy": 0.82890625, "signal/frontier_ece_reward/group_std_mean": 0.05865926668047905, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004098983202129603, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004098983202129603, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27483277320861815, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35315130949020385, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027483277022838593, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027483277022838593, "step": 80 }, { "calibration/aurc": 0.3897235445216074, "calibration/batch_distribution_entropy": 0.9871985803996584, "calibration/batch_entropy_100bins": 0.9650498108188612, "calibration/batch_entropy_10bins": 0.9871985803996584, "calibration/batch_entropy_50bins": 0.9774071673496001, "calibration/batch_uniqueness": 0.9601234608508132, "calibration/buffer_distribution_entropy": 0.9776273835298188, "calibration/buffer_entropy_100bins": 0.9421174103566827, "calibration/buffer_entropy_10bins": 0.9776273835298188, "calibration/buffer_entropy_50bins": 0.9610857397243047, "calibration/confidence_entropy": 0.4835290106060966, "calibration/coverage@0%": 0.0035194471624266145, "calibration/coverage@1%": 0.0035194471624266145, "calibration/coverage@10%": 0.05117569716242661, "calibration/coverage@15%": 0.1171913221624266, "calibration/coverage@20%": 0.15980461105675148, "calibration/coverage@25%": 0.2211900684931507, "calibration/coverage@30%": 0.3173709637964775, "calibration/coverage@5%": 0.0035194471624266145, "calibration/ece": 0.14380001971983383, "calibration/mean_confidence": 0.5027473010005465, "calibration/prompt_uniqueness": 0.880182234082336, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1118.0, "completions/max_terminated_length": 649.4, "completions/mean_length": 167.30830078125, "completions/mean_terminated_length": 166.77369995117186, "completions/min_length": 64.8, "completions/min_terminated_length": 64.8, "epoch": 0.272, "grad_norm": 0.0011026699794456363, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 282376631.0, "reward": 0.9143985748291016, "reward_std": 0.10913633704185485, "rewards/accuracy_reward": 0.503125, "rewards/brier_reward": 0.7558487296104431, "rewards/confidence_uniqueness_reward": 0.958933699131012, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.003067450597882271, "rewards/frontier_coverage_0": 0.09763064086437226, "rewards/frontier_coverage_1": 0.09763064086437226, "rewards/frontier_coverage_10": 0.09763064086437226, "rewards/frontier_coverage_15": 0.09763064086437226, "rewards/frontier_coverage_20": 0.09763064086437226, "rewards/frontier_coverage_25": 0.09763064086437226, "rewards/frontier_coverage_5": 0.09763064086437226, "rewards/frontier_ece_reward": 0.016011307016015053, "rewards/frontier_entropy_batch_reward": -0.18454676866531372, "signal/accuracy_reward/centered_abs_mean": 0.1283447265625, "signal/accuracy_reward/group_bin_occupancy": 0.18359375, "signal/accuracy_reward/group_std_mean": 0.1672771155834198, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06417236328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06417236328125, "signal/advantage_abs_mean": 0.08471592962741852, "signal/advantage_pre_scale_abs_mean": 0.08471592962741852, "signal/advantage_pre_scale_std": 0.12727494090795516, "signal/advantage_std": 0.12727494090795516, "signal/brier_reward/centered_abs_mean": 0.1756508618593216, "signal/brier_reward/group_bin_occupancy": 0.872265625, "signal/brier_reward/group_std_mean": 0.2209733545780182, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017565086483955383, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017565086483955383, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013165917806327343, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018373236805200577, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013165918411687017, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013165918411687017, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002310941834002733, "signal/frontier_aurc_reward/group_bin_occupancy": 0.778125, "signal/frontier_aurc_reward/group_std_mean": 0.0034583484288305045, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8886771542602218e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8886771542602218e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_0/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_0/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_1/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_1/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_1/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_10/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_10/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_10/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_15/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_15/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_15/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_20/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_20/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_20/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_25/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_25/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_25/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_5/centered_abs_mean": 0.21623624563217164, "signal/frontier_coverage_5/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_5/group_std_mean": 0.2773744761943817, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002702953014522791, "signal/frontier_ece_reward/centered_abs_mean": 0.04007608145475387, "signal/frontier_ece_reward/group_bin_occupancy": 0.824609375, "signal/frontier_ece_reward/group_std_mean": 0.05737483724951744, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004007608164101839, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004007608164101839, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26959097683429717, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3461661696434021, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026959098130464553, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026959098130464553, "step": 85 }, { "calibration/aurc": 0.33929897089230937, "calibration/batch_distribution_entropy": 0.989794172784116, "calibration/batch_entropy_100bins": 0.9616161331101599, "calibration/batch_entropy_10bins": 0.989794172784116, "calibration/batch_entropy_50bins": 0.9782942991565935, "calibration/batch_uniqueness": 0.9621734619140625, "calibration/buffer_distribution_entropy": 0.9798353797281024, "calibration/buffer_entropy_100bins": 0.9472962073477508, "calibration/buffer_entropy_10bins": 0.9798353797281024, "calibration/buffer_entropy_50bins": 0.9648110714996019, "calibration/confidence_entropy": 0.5029849704441383, "calibration/coverage@0%": 0.016796875, "calibration/coverage@1%": 0.016796875, "calibration/coverage@10%": 0.06796875, "calibration/coverage@15%": 0.10859375, "calibration/coverage@20%": 0.158203125, "calibration/coverage@25%": 0.209375, "calibration/coverage@30%": 0.330078125, "calibration/coverage@5%": 0.026171875, "calibration/ece": 0.13708101797225564, "calibration/mean_confidence": 0.5260060638175437, "calibration/prompt_uniqueness": 0.8890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 990.4, "completions/max_terminated_length": 636.2, "completions/mean_length": 157.38349609375, "completions/mean_terminated_length": 157.11478271484376, "completions/min_length": 65.2, "completions/min_terminated_length": 65.2, "epoch": 0.288, "grad_norm": 0.0010967063717544079, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 298946414.0, "reward": 0.9226956605911255, "reward_std": 0.10692842602729798, "rewards/accuracy_reward": 0.5185546875, "rewards/brier_reward": 0.759922206401825, "rewards/confidence_uniqueness_reward": 0.9619071364402771, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0029738360550254582, "rewards/frontier_coverage_0": 0.08782123178243637, "rewards/frontier_coverage_1": 0.08782123178243637, "rewards/frontier_coverage_10": 0.08782123178243637, "rewards/frontier_coverage_15": 0.08782123178243637, "rewards/frontier_coverage_20": 0.08782123178243637, "rewards/frontier_coverage_25": 0.08782123178243637, "rewards/frontier_coverage_5": 0.08782123178243637, "rewards/frontier_ece_reward": 0.015628389501944184, "rewards/frontier_entropy_batch_reward": -0.17876963317394257, "signal/accuracy_reward/centered_abs_mean": 0.12471923828125, "signal/accuracy_reward/group_bin_occupancy": 0.18671875, "signal/accuracy_reward/group_std_mean": 0.16823607087135314, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062359619140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062359619140625, "signal/advantage_abs_mean": 0.08173956871032714, "signal/advantage_pre_scale_abs_mean": 0.08173956871032714, "signal/advantage_pre_scale_std": 0.12362392991781235, "signal/advantage_std": 0.12362392991781235, "signal/brier_reward/centered_abs_mean": 0.16985757648944855, "signal/brier_reward/group_bin_occupancy": 0.8671875, "signal/brier_reward/group_std_mean": 0.21388141214847564, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016985757648944853, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016985757648944853, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011985784396529198, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.937890625, "signal/confidence_uniqueness_reward/group_std_mean": 0.015646530874073507, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011985784396529198, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011985784396529198, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022517605219036342, "signal/frontier_aurc_reward/group_bin_occupancy": 0.753125, "signal/frontier_aurc_reward/group_std_mean": 0.00341446828097105, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8147007833467795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8147007833467795e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_0/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_0/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_1/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_1/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_1/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_10/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_10/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_10/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_15/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_15/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_15/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_20/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_20/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_20/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_25/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_25/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_25/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_5/centered_abs_mean": 0.20966576039791107, "signal/frontier_coverage_5/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_5/group_std_mean": 0.2668231546878815, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620822051540017, "signal/frontier_ece_reward/centered_abs_mean": 0.03799701854586601, "signal/frontier_ece_reward/group_bin_occupancy": 0.808203125, "signal/frontier_ece_reward/group_std_mean": 0.05444479286670685, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003799702040851116, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003799702040851116, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26266041994094846, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3405035316944122, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02626604326069355, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02626604326069355, "step": 90 }, { "calibration/aurc": 0.31348549126500747, "calibration/batch_distribution_entropy": 0.984274980481597, "calibration/batch_entropy_100bins": 0.9631625065111967, "calibration/batch_entropy_10bins": 0.984274980481597, "calibration/batch_entropy_50bins": 0.9770299989819294, "calibration/batch_uniqueness": 0.9626984046368829, "calibration/buffer_distribution_entropy": 0.9815639661047172, "calibration/buffer_entropy_100bins": 0.9515858500656005, "calibration/buffer_entropy_10bins": 0.9815639661047172, "calibration/buffer_entropy_50bins": 0.967851841375308, "calibration/confidence_entropy": 0.5032728046019687, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.014453125, "calibration/coverage@15%": 0.11645899584148726, "calibration/coverage@20%": 0.2305727128180039, "calibration/coverage@25%": 0.3513194104696673, "calibration/coverage@30%": 0.4826749021526419, "calibration/coverage@5%": 0.009765625, "calibration/ece": 0.12730379469726888, "calibration/mean_confidence": 0.5350013606511557, "calibration/prompt_uniqueness": 0.8859659534339229, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 721.4, "completions/max_terminated_length": 502.4, "completions/mean_length": 158.0119140625, "completions/mean_terminated_length": 157.87767639160157, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.304, "grad_norm": 0.0013374168192967772, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 315494408.0, "reward": 0.920866048336029, "reward_std": 0.10256336778402328, "rewards/accuracy_reward": 0.51435546875, "rewards/brier_reward": 0.7553410172462464, "rewards/confidence_uniqueness_reward": 0.9617467761039734, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003019801015034318, "rewards/frontier_coverage_0": 0.08569234870374202, "rewards/frontier_coverage_1": 0.08569234870374202, "rewards/frontier_coverage_10": 0.08569234870374202, "rewards/frontier_coverage_15": 0.08569234870374202, "rewards/frontier_coverage_20": 0.08569234870374202, "rewards/frontier_coverage_25": 0.08569234870374202, "rewards/frontier_coverage_5": 0.08569234870374202, "rewards/frontier_ece_reward": 0.014303101412951946, "rewards/frontier_entropy_batch_reward": -0.16813477575778962, "signal/accuracy_reward/centered_abs_mean": 0.118218994140625, "signal/accuracy_reward/group_bin_occupancy": 0.181640625, "signal/accuracy_reward/group_std_mean": 0.15723580718040467, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0591094970703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0591094970703125, "signal/advantage_abs_mean": 0.07947536259889602, "signal/advantage_pre_scale_abs_mean": 0.07947536259889602, "signal/advantage_pre_scale_std": 0.1194717451930046, "signal/advantage_std": 0.1194717451930046, "signal/brier_reward/centered_abs_mean": 0.16404346823692323, "signal/brier_reward/group_bin_occupancy": 0.878515625, "signal/brier_reward/group_std_mean": 0.20719643235206603, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016404346562922, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016404346562922, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012253463082015515, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93359375, "signal/confidence_uniqueness_reward/group_std_mean": 0.01594906710088253, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001225346396677196, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001225346396677196, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021592382341623305, "signal/frontier_aurc_reward/group_bin_occupancy": 0.76875, "signal/frontier_aurc_reward/group_std_mean": 0.0032209414057433605, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.699047727219295e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.699047727219295e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_0/group_bin_occupancy": 0.8875, "signal/frontier_coverage_0/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_1/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_1/group_bin_occupancy": 0.8875, "signal/frontier_coverage_1/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_10/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_10/group_bin_occupancy": 0.8875, "signal/frontier_coverage_10/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_15/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_15/group_bin_occupancy": 0.8875, "signal/frontier_coverage_15/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_20/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_20/group_bin_occupancy": 0.8875, "signal/frontier_coverage_20/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_25/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_25/group_bin_occupancy": 0.8875, "signal/frontier_coverage_25/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_5/centered_abs_mean": 0.2054966926574707, "signal/frontier_coverage_5/group_bin_occupancy": 0.8875, "signal/frontier_coverage_5/group_std_mean": 0.2626974046230316, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025687087327241898, "signal/frontier_ece_reward/centered_abs_mean": 0.03668390363454819, "signal/frontier_ece_reward/group_bin_occupancy": 0.805078125, "signal/frontier_ece_reward/group_std_mean": 0.05322126373648643, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003668390540406108, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003668390540406108, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25058538317680357, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3319805324077606, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025058538839221, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025058538839221, "step": 95 }, { "calibration/aurc": 0.2693574848107298, "calibration/batch_distribution_entropy": 0.9883345901493682, "calibration/batch_entropy_100bins": 0.964873857679726, "calibration/batch_entropy_10bins": 0.9883345901493682, "calibration/batch_entropy_50bins": 0.9762615204324228, "calibration/batch_uniqueness": 0.962933349609375, "calibration/buffer_distribution_entropy": 0.9829313733544339, "calibration/buffer_entropy_100bins": 0.9553256663761157, "calibration/buffer_entropy_10bins": 0.9829313733544339, "calibration/buffer_entropy_50bins": 0.9704623223411284, "calibration/confidence_entropy": 0.5033581533240031, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.198046875, "calibration/coverage@15%": 0.321875, "calibration/coverage@20%": 0.42265625, "calibration/coverage@25%": 0.519140625, "calibration/coverage@30%": 0.587890625, "calibration/coverage@5%": 0.051171875, "calibration/ece": 0.1460491076202734, "calibration/mean_confidence": 0.536501651706636, "calibration/prompt_uniqueness": 0.88388671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.2, "completions/max_terminated_length": 505.2, "completions/mean_length": 159.97177734375, "completions/mean_terminated_length": 159.97177734375, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.32, "grad_norm": 0.001107752905227244, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 332221223.0, "reward": 0.9314111828804016, "reward_std": 0.09003743529319763, "rewards/accuracy_reward": 0.53388671875, "rewards/brier_reward": 0.7707650423049927, "rewards/confidence_uniqueness_reward": 0.9618469119071961, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002678099344484508, "rewards/frontier_coverage_0": 0.08919677138328552, "rewards/frontier_coverage_1": 0.08919677138328552, "rewards/frontier_coverage_10": 0.08919677138328552, "rewards/frontier_coverage_15": 0.08919677138328552, "rewards/frontier_coverage_20": 0.08919677138328552, "rewards/frontier_coverage_25": 0.08919677138328552, "rewards/frontier_coverage_5": 0.08919677138328552, "rewards/frontier_ece_reward": 0.017109639570116998, "rewards/frontier_entropy_batch_reward": -0.18177941143512727, "signal/accuracy_reward/centered_abs_mean": 0.088275146484375, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.12341197431087494, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441375732421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0441375732421875, "signal/advantage_abs_mean": 0.06831415593624116, "signal/advantage_pre_scale_abs_mean": 0.06831415593624116, "signal/advantage_pre_scale_std": 0.10720582604408264, "signal/advantage_std": 0.10720582604408264, "signal/brier_reward/centered_abs_mean": 0.1551128536462784, "signal/brier_reward/group_bin_occupancy": 0.8765625, "signal/brier_reward/group_std_mean": 0.19683083295822143, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01551128625869751, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01551128625869751, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011729908920824528, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.930859375, "signal/confidence_uniqueness_reward/group_std_mean": 0.015355130471289159, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011729909107089042, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011729909107089042, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002058024751022458, "signal/frontier_aurc_reward/group_bin_occupancy": 0.765234375, "signal/frontier_aurc_reward/group_std_mean": 0.0031040641479194164, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5725309751578606e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5725309751578606e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_0/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_0/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_1/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_1/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_1/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_10/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_10/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_10/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_15/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_15/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_15/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_20/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_20/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_20/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_25/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_25/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_25/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_5/centered_abs_mean": 0.18719760775566102, "signal/frontier_coverage_5/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_5/group_std_mean": 0.2401178687810898, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023399701341986654, "signal/frontier_ece_reward/centered_abs_mean": 0.036752212792634964, "signal/frontier_ece_reward/group_bin_occupancy": 0.809375, "signal/frontier_ece_reward/group_std_mean": 0.05313318446278572, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003675221325829625, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003675221325829625, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2627487242221832, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3402763903141022, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026274873316287993, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026274873316287993, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5221447170861099, "eval_calibration/batch_distribution_entropy": 0.9459823530950395, "eval_calibration/batch_entropy_100bins": 0.7099314297963414, "eval_calibration/batch_entropy_10bins": 0.9459823530950395, "eval_calibration/batch_entropy_50bins": 0.80145287432243, "eval_calibration/batch_uniqueness": 0.9072265625, "eval_calibration/buffer_distribution_entropy": 0.9836311818150062, "eval_calibration/buffer_entropy_100bins": 0.9572832371945064, "eval_calibration/buffer_entropy_10bins": 0.9836311818150062, "eval_calibration/buffer_entropy_50bins": 0.9718177911308372, "eval_calibration/confidence_entropy": 0.48188092462291277, "eval_calibration/coverage@0%": 0.0234375, "eval_calibration/coverage@1%": 0.0234375, "eval_calibration/coverage@10%": 0.0234375, "eval_calibration/coverage@15%": 0.0234375, "eval_calibration/coverage@20%": 0.0234375, "eval_calibration/coverage@25%": 0.0234375, "eval_calibration/coverage@30%": 0.078125, "eval_calibration/coverage@5%": 0.0234375, "eval_calibration/ece": 0.21960188052234064, "eval_calibration/mean_confidence": 0.4858527777314262, "eval_calibration/prompt_uniqueness": 0.9072265625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 343.5, "eval_completions/max_terminated_length": 343.5, "eval_completions/mean_length": 161.39608001708984, "eval_completions/mean_terminated_length": 161.39608001708984, "eval_completions/min_length": 88.0, "eval_completions/min_terminated_length": 88.0, "eval_loss": 0.0, "eval_num_tokens": 332221223.0, "eval_reward": 0.7855877131223679, "eval_reward_std": 0.2253391109406948, "eval_rewards/accuracy_reward": 0.40234375, "eval_rewards/brier_reward": 0.767953634262085, "eval_rewards/confidence_uniqueness_reward": 0.904541015625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003360171685926616, "eval_rewards/frontier_coverage_0": 0.17842230759561062, "eval_rewards/frontier_coverage_1": 0.17842230759561062, "eval_rewards/frontier_coverage_10": 0.17842230759561062, "eval_rewards/frontier_coverage_15": 0.17842230759561062, "eval_rewards/frontier_coverage_20": 0.17842230759561062, "eval_rewards/frontier_coverage_25": 0.17842230759561062, "eval_rewards/frontier_coverage_5": 0.17842230759561062, "eval_rewards/frontier_ece_reward": 0.015964159043505788, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 18.266, "eval_samples_per_second": 27.373, "eval_signal/accuracy_reward/centered_abs_mean": 0.46875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.491495244204998, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.234375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.234375, "eval_signal/advantage_abs_mean": 0.20828185975551605, "eval_signal/advantage_pre_scale_abs_mean": 0.20828185975551605, "eval_signal/advantage_pre_scale_std": 0.2229425571858883, "eval_signal/advantage_std": 0.2229425571858883, "eval_signal/brier_reward/centered_abs_mean": 0.21178840100765228, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.26099943369627, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021178840193897486, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021178840193897486, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037750244140625, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3671875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04435160104185343, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037750244955532253, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037750244955532253, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00327087048208341, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8046875, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0050255340756848454, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.088588320882991e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.088588320882991e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_0/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_1/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_10/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_15/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_20/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_25/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.36353210359811783, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_5/group_std_mean": 0.4482342004776001, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004544151364825666, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.037730203941464424, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.765625, "eval_signal/frontier_ece_reward/group_std_mean": 0.061389719136059284, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037730205804109573, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037730205804109573, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.219, "step": 100 }, { "calibration/aurc": 0.30565225146740005, "calibration/batch_distribution_entropy": 0.9817950038846648, "calibration/batch_entropy_100bins": 0.9574620453276896, "calibration/batch_entropy_10bins": 0.9817950038846648, "calibration/batch_entropy_50bins": 0.9759610198636606, "calibration/batch_uniqueness": 0.9610809326171875, "calibration/buffer_distribution_entropy": 0.9858778348612034, "calibration/buffer_entropy_100bins": 0.9610770923601644, "calibration/buffer_entropy_10bins": 0.9858778348612034, "calibration/buffer_entropy_50bins": 0.9746952731382882, "calibration/confidence_entropy": 0.4912567329274273, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.001953125, "calibration/coverage@15%": 0.06328125, "calibration/coverage@20%": 0.15, "calibration/coverage@25%": 0.326171875, "calibration/coverage@30%": 0.60625, "calibration/coverage@5%": 0.001953125, "calibration/ece": 0.13340430237380113, "calibration/mean_confidence": 0.4750956146068159, "calibration/prompt_uniqueness": 0.87880859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 161.48173828125, "completions/mean_terminated_length": 161.48173828125, "completions/min_length": 64.4, "completions/min_terminated_length": 64.4, "epoch": 0.336, "grad_norm": 0.0009220660431310534, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 348597228.0, "reward": 0.9312142729759216, "reward_std": 0.09383742660284042, "rewards/accuracy_reward": 0.5375, "rewards/brier_reward": 0.7658124089241027, "rewards/confidence_uniqueness_reward": 0.9620379209518433, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002564789913594723, "rewards/frontier_coverage_0": 0.086562230437994, "rewards/frontier_coverage_1": 0.086562230437994, "rewards/frontier_coverage_10": 0.086562230437994, "rewards/frontier_coverage_15": 0.086562230437994, "rewards/frontier_coverage_20": 0.086562230437994, "rewards/frontier_coverage_25": 0.086562230437994, "rewards/frontier_coverage_5": 0.086562230437994, "rewards/frontier_ece_reward": 0.016775081306695937, "rewards/frontier_entropy_batch_reward": -0.19491543173789977, "signal/accuracy_reward/centered_abs_mean": 0.10306396484375, "signal/accuracy_reward/group_bin_occupancy": 0.176171875, "signal/accuracy_reward/group_std_mean": 0.13901238441467284, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051531982421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051531982421875, "signal/advantage_abs_mean": 0.07275230437517166, "signal/advantage_pre_scale_abs_mean": 0.07275230437517166, "signal/advantage_pre_scale_std": 0.11130416691303253, "signal/advantage_std": 0.11130416691303253, "signal/brier_reward/centered_abs_mean": 0.15814976394176483, "signal/brier_reward/group_bin_occupancy": 0.862890625, "signal/brier_reward/group_std_mean": 0.20069519579410552, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01581497713923454, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01581497713923454, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013001594133675099, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91015625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01670000497251749, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013001594459638, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013001594459638, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019870033720508218, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7640625, "signal/frontier_aurc_reward/group_std_mean": 0.0030117711983621122, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4837543605826795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4837543605826795e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_0/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_0/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_1/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_1/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_1/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_10/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_10/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_10/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_15/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_15/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_15/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_20/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_20/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_20/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_25/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_25/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_25/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_5/centered_abs_mean": 0.2016854852437973, "signal/frontier_coverage_5/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_5/group_std_mean": 0.2578335404396057, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025210686959326266, "signal/frontier_ece_reward/centered_abs_mean": 0.03453442975878716, "signal/frontier_ece_reward/group_bin_occupancy": 0.797265625, "signal/frontier_ece_reward/group_std_mean": 0.05071103274822235, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003453443106263876, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003453443106263876, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709068328142166, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34899981021881105, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027090684697031974, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027090684697031974, "step": 105 }, { "calibration/aurc": 0.3341179136812372, "calibration/batch_distribution_entropy": 0.9588299480996344, "calibration/batch_entropy_100bins": 0.9429023940351428, "calibration/batch_entropy_10bins": 0.9588299480996344, "calibration/batch_entropy_50bins": 0.9597674673427136, "calibration/batch_uniqueness": 0.9558685302734375, "calibration/buffer_distribution_entropy": 0.9919930074210095, "calibration/buffer_entropy_100bins": 0.9728636582987387, "calibration/buffer_entropy_10bins": 0.9919930074210095, "calibration/buffer_entropy_50bins": 0.9833901381550991, "calibration/confidence_entropy": 0.4574315079144936, "calibration/coverage@0%": 0.01953125, "calibration/coverage@1%": 0.01953125, "calibration/coverage@10%": 0.1015625, "calibration/coverage@15%": 0.225390625, "calibration/coverage@20%": 0.333203125, "calibration/coverage@25%": 0.41015625, "calibration/coverage@30%": 0.48515625, "calibration/coverage@5%": 0.0265625, "calibration/ece": 0.13175204310416827, "calibration/mean_confidence": 0.44462494666585883, "calibration/prompt_uniqueness": 0.865966796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 614.4, "completions/max_terminated_length": 614.4, "completions/mean_length": 162.67568359375, "completions/mean_terminated_length": 162.67568359375, "completions/min_length": 72.6, "completions/min_terminated_length": 72.6, "epoch": 0.352, "grad_norm": 0.0011161722941324115, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 365523443.0, "reward": 0.9019862651824951, "reward_std": 0.09622626602649689, "rewards/accuracy_reward": 0.47529296875, "rewards/brier_reward": 0.7733848929405213, "rewards/confidence_uniqueness_reward": 0.9622901916503906, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002903068531304598, "rewards/frontier_coverage_0": 0.13595542460680007, "rewards/frontier_coverage_1": 0.13595542460680007, "rewards/frontier_coverage_10": 0.13595542460680007, "rewards/frontier_coverage_15": 0.13595542460680007, "rewards/frontier_coverage_20": 0.13595542460680007, "rewards/frontier_coverage_25": 0.13595542460680007, "rewards/frontier_coverage_5": 0.13595542460680007, "rewards/frontier_ece_reward": 0.015835122019052506, "rewards/frontier_entropy_batch_reward": -0.22671036124229432, "signal/accuracy_reward/centered_abs_mean": 0.110052490234375, "signal/accuracy_reward/group_bin_occupancy": 0.175390625, "signal/accuracy_reward/group_std_mean": 0.14299680292606354, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0550262451171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0550262451171875, "signal/advantage_abs_mean": 0.07607890367507934, "signal/advantage_pre_scale_abs_mean": 0.07607890367507934, "signal/advantage_pre_scale_std": 0.11513545215129853, "signal/advantage_std": 0.11513545215129853, "signal/brier_reward/centered_abs_mean": 0.15754351615905762, "signal/brier_reward/group_bin_occupancy": 0.85390625, "signal/brier_reward/group_std_mean": 0.2009361833333969, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015754351764917372, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015754351764917372, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014543581008911132, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.884375, "signal/confidence_uniqueness_reward/group_std_mean": 0.018578647449612618, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014543581288307904, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014543581288307904, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002373543428257108, "signal/frontier_aurc_reward/group_bin_occupancy": 0.753515625, "signal/frontier_aurc_reward/group_std_mean": 0.0036731195170432327, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.966929350805003e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.966929350805003e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_0/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_0/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_1/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_1/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_1/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_10/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_10/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_10/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_15/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_15/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_15/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_20/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_20/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_20/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_25/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_25/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_25/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_5/centered_abs_mean": 0.20907978415489198, "signal/frontier_coverage_5/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_5/group_std_mean": 0.2643455803394318, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026134973857551815, "signal/frontier_ece_reward/centered_abs_mean": 0.03194341510534286, "signal/frontier_ece_reward/group_bin_occupancy": 0.793359375, "signal/frontier_ece_reward/group_std_mean": 0.04675339683890343, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003194341529160738, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003194341529160738, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29024515151977537, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36637923717498777, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029024516791105272, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029024516791105272, "step": 110 }, { "calibration/aurc": 0.3880178279199678, "calibration/batch_distribution_entropy": 0.976342214061056, "calibration/batch_entropy_100bins": 0.947550732636436, "calibration/batch_entropy_10bins": 0.976342214061056, "calibration/batch_entropy_50bins": 0.9675931845241565, "calibration/batch_uniqueness": 0.9630584716796875, "calibration/buffer_distribution_entropy": 0.9963760938673006, "calibration/buffer_entropy_100bins": 0.9822785458994316, "calibration/buffer_entropy_10bins": 0.9963760938673006, "calibration/buffer_entropy_50bins": 0.9900233021144178, "calibration/confidence_entropy": 0.4652751317872042, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.043359375, "calibration/coverage@15%": 0.08515625, "calibration/coverage@20%": 0.165234375, "calibration/coverage@25%": 0.320703125, "calibration/coverage@30%": 0.365234375, "calibration/coverage@5%": 0.019921875, "calibration/ece": 0.1469109474352704, "calibration/mean_confidence": 0.5193943627209796, "calibration/prompt_uniqueness": 0.88134765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 970.2, "completions/max_terminated_length": 616.4, "completions/mean_length": 163.323828125, "completions/mean_terminated_length": 163.05493774414063, "completions/min_length": 70.4, "completions/min_terminated_length": 70.4, "epoch": 0.368, "grad_norm": 0.0010371602838858962, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 382261351.0, "reward": 0.9131480097770691, "reward_std": 0.09592696875333787, "rewards/accuracy_reward": 0.5, "rewards/brier_reward": 0.771314287185669, "rewards/confidence_uniqueness_reward": 0.9649186968803406, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0033288683742284777, "rewards/frontier_coverage_0": 0.1177740141749382, "rewards/frontier_coverage_1": 0.1177740141749382, "rewards/frontier_coverage_10": 0.1177740141749382, "rewards/frontier_coverage_15": 0.1177740141749382, "rewards/frontier_coverage_20": 0.1177740141749382, "rewards/frontier_coverage_25": 0.1177740141749382, "rewards/frontier_coverage_5": 0.1177740141749382, "rewards/frontier_ece_reward": 0.014998926036059856, "rewards/frontier_entropy_batch_reward": -0.22141122221946716, "signal/accuracy_reward/centered_abs_mean": 0.1026611328125, "signal/accuracy_reward/group_bin_occupancy": 0.1734375, "signal/accuracy_reward/group_std_mean": 0.13475327789783478, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05133056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05133056640625, "signal/advantage_abs_mean": 0.07525163143873215, "signal/advantage_pre_scale_abs_mean": 0.07525163143873215, "signal/advantage_pre_scale_std": 0.11405473798513413, "signal/advantage_std": 0.11405473798513413, "signal/brier_reward/centered_abs_mean": 0.1544673502445221, "signal/brier_reward/group_bin_occupancy": 0.859375, "signal/brier_reward/group_std_mean": 0.19688616693019867, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015446734987199307, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015446734987199307, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013924498483538628, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.01840968318283558, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001392449880950153, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001392449880950153, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003003736166283488, "signal/frontier_aurc_reward/group_bin_occupancy": 0.746484375, "signal/frontier_aurc_reward/group_std_mean": 0.004610391240566969, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7546701423707415e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7546701423707415e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_0/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_0/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_1/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_1/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_1/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_10/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_10/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_10/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_15/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_15/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_15/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_20/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_25/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_25/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_25/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_5/centered_abs_mean": 0.19367235004901887, "signal/frontier_coverage_5/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_5/group_std_mean": 0.24632689356803894, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024209044873714446, "signal/frontier_ece_reward/centered_abs_mean": 0.03021877408027649, "signal/frontier_ece_reward/group_bin_occupancy": 0.82109375, "signal/frontier_ece_reward/group_std_mean": 0.042821260541677474, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030218774918466806, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030218774918466806, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2965745747089386, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37327985763549804, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029657458886504173, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029657458886504173, "step": 115 }, { "calibration/aurc": 0.3511573014841358, "calibration/batch_distribution_entropy": 0.9747074938964195, "calibration/batch_entropy_100bins": 0.9529573262726035, "calibration/batch_entropy_10bins": 0.9747074938964195, "calibration/batch_entropy_50bins": 0.9709856182741701, "calibration/batch_uniqueness": 0.961004638671875, "calibration/buffer_distribution_entropy": 0.9985135010971646, "calibration/buffer_entropy_100bins": 0.9882330495239049, "calibration/buffer_entropy_10bins": 0.9985135010971646, "calibration/buffer_entropy_50bins": 0.99400929049607, "calibration/confidence_entropy": 0.4714483178300538, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.135546875, "calibration/coverage@15%": 0.205859375, "calibration/coverage@20%": 0.29765625, "calibration/coverage@25%": 0.34453125, "calibration/coverage@30%": 0.398828125, "calibration/coverage@5%": 0.04921875, "calibration/ece": 0.1429656466704134, "calibration/mean_confidence": 0.4671164953742596, "calibration/prompt_uniqueness": 0.870458984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 953.2, "completions/max_terminated_length": 579.2, "completions/mean_length": 162.4927734375, "completions/mean_terminated_length": 162.2256286621094, "completions/min_length": 69.4, "completions/min_terminated_length": 69.4, "epoch": 0.384, "grad_norm": 0.0010119343642145395, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 398781789.0, "reward": 0.9290378093719482, "reward_std": 0.09455136507749558, "rewards/accuracy_reward": 0.530859375, "rewards/brier_reward": 0.7825330376625061, "rewards/confidence_uniqueness_reward": 0.9647130966186523, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003093740437179804, "rewards/frontier_coverage_0": 0.10442648828029633, "rewards/frontier_coverage_1": 0.10442648828029633, "rewards/frontier_coverage_10": 0.10442648828029633, "rewards/frontier_coverage_15": 0.10442648828029633, "rewards/frontier_coverage_20": 0.10442648828029633, "rewards/frontier_coverage_25": 0.10442648828029633, "rewards/frontier_coverage_5": 0.10442648828029633, "rewards/frontier_ece_reward": 0.013694177567958831, "rewards/frontier_entropy_batch_reward": -0.21486915349960328, "signal/accuracy_reward/centered_abs_mean": 0.10360107421875, "signal/accuracy_reward/group_bin_occupancy": 0.1765625, "signal/accuracy_reward/group_std_mean": 0.13945001363754272, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051800537109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051800537109375, "signal/advantage_abs_mean": 0.07290669530630112, "signal/advantage_pre_scale_abs_mean": 0.07290669530630112, "signal/advantage_pre_scale_std": 0.11290555596351623, "signal/advantage_std": 0.11290555596351623, "signal/brier_reward/centered_abs_mean": 0.13870272636413575, "signal/brier_reward/group_bin_occupancy": 0.841796875, "signal/brier_reward/group_std_mean": 0.1802999347448349, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013870272599160672, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013870272599160672, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013296573236584664, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.883203125, "signal/confidence_uniqueness_reward/group_std_mean": 0.017479157820343972, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013296573655679823, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013296573655679823, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002974971802905202, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125, "signal/frontier_aurc_reward/group_std_mean": 0.004687594994902611, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.718714833667036e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.718714833667036e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_0/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_0/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_1/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_1/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_1/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_10/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_10/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_10/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_15/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_20/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_20/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_20/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_25/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_25/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_25/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_5/centered_abs_mean": 0.18035527765750886, "signal/frontier_coverage_5/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_5/group_std_mean": 0.2348244309425354, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022544410079717637, "signal/frontier_ece_reward/centered_abs_mean": 0.02462676987051964, "signal/frontier_ece_reward/group_bin_occupancy": 0.832421875, "signal/frontier_ece_reward/group_std_mean": 0.03513662964105606, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002462677052244544, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002462677052244544, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28050180673599245, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35507087111473085, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02805018164217472, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02805018164217472, "step": 120 }, { "calibration/aurc": 0.42825535009099047, "calibration/batch_distribution_entropy": 0.9843865680153211, "calibration/batch_entropy_100bins": 0.9538506842868667, "calibration/batch_entropy_10bins": 0.9843865680153211, "calibration/batch_entropy_50bins": 0.9709620915826056, "calibration/batch_uniqueness": 0.9653472900390625, "calibration/buffer_distribution_entropy": 0.9992208380784964, "calibration/buffer_entropy_100bins": 0.9907419869998911, "calibration/buffer_entropy_10bins": 0.9992208380784964, "calibration/buffer_entropy_50bins": 0.9958122546116293, "calibration/confidence_entropy": 0.5159266248918248, "calibration/coverage@0%": 0.00390625, "calibration/coverage@1%": 0.00390625, "calibration/coverage@10%": 0.010546875, "calibration/coverage@15%": 0.016796875, "calibration/coverage@20%": 0.04375, "calibration/coverage@25%": 0.080859375, "calibration/coverage@30%": 0.129296875, "calibration/coverage@5%": 0.00390625, "calibration/ece": 0.14866836772807127, "calibration/mean_confidence": 0.512600992886363, "calibration/prompt_uniqueness": 0.88505859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 912.2, "completions/max_terminated_length": 671.4, "completions/mean_length": 163.5146484375, "completions/mean_terminated_length": 163.24703369140624, "completions/min_length": 72.4, "completions/min_terminated_length": 72.4, "epoch": 0.4, "grad_norm": 0.0015616186428815126, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 415492627.0, "reward": 0.9132670998573303, "reward_std": 0.10271851271390915, "rewards/accuracy_reward": 0.50390625, "rewards/brier_reward": 0.7699137806892395, "rewards/confidence_uniqueness_reward": 0.9647279858589173, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003537365049123764, "rewards/frontier_coverage_0": 0.10254341214895249, "rewards/frontier_coverage_1": 0.10254341214895249, "rewards/frontier_coverage_10": 0.10254341214895249, "rewards/frontier_coverage_15": 0.10254341214895249, "rewards/frontier_coverage_20": 0.10254341214895249, "rewards/frontier_coverage_25": 0.10254341214895249, "rewards/frontier_coverage_5": 0.10254341214895249, "rewards/frontier_ece_reward": 0.010206561535596848, "rewards/frontier_entropy_batch_reward": -0.21855055093765258, "signal/accuracy_reward/centered_abs_mean": 0.11923828125, "signal/accuracy_reward/group_bin_occupancy": 0.180859375, "signal/accuracy_reward/group_std_mean": 0.15680868923664093, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059619140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.059619140625, "signal/advantage_abs_mean": 0.07999172508716583, "signal/advantage_pre_scale_abs_mean": 0.07999172508716583, "signal/advantage_pre_scale_std": 0.1227585643529892, "signal/advantage_std": 0.1227585643529892, "signal/brier_reward/centered_abs_mean": 0.1432872533798218, "signal/brier_reward/group_bin_occupancy": 0.874609375, "signal/brier_reward/group_std_mean": 0.1836364448070526, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014328726008534432, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014328726008534432, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01301488820463419, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.018223760277032854, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013014888390898705, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013014888390898705, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033591561019420623, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7171875, "signal/frontier_aurc_reward/group_std_mean": 0.0054647172801196575, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1989451710833235e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1989451710833235e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_0/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_0/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_1/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_1/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_1/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_10/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_10/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_10/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_15/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_15/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_15/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_20/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_20/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_20/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_25/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_25/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_25/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_5/centered_abs_mean": 0.1780136674642563, "signal/frontier_coverage_5/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_5/group_std_mean": 0.2294588565826416, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022251708433032036, "signal/frontier_ece_reward/centered_abs_mean": 0.020605326071381568, "signal/frontier_ece_reward/group_bin_occupancy": 0.83828125, "signal/frontier_ece_reward/group_std_mean": 0.029997162893414496, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020605326164513825, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020605326164513825, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2866648018360138, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36136451959609983, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028666481375694275, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028666481375694275, "step": 125 }, { "calibration/aurc": 0.32033459954652566, "calibration/batch_distribution_entropy": 0.9757990834357446, "calibration/batch_entropy_100bins": 0.9516993202749005, "calibration/batch_entropy_10bins": 0.9757990834357446, "calibration/batch_entropy_50bins": 0.969603238359294, "calibration/batch_uniqueness": 0.964125328319047, "calibration/buffer_distribution_entropy": 0.9992577401022663, "calibration/buffer_entropy_100bins": 0.990822037253148, "calibration/buffer_entropy_10bins": 0.9992577401022663, "calibration/buffer_entropy_50bins": 0.9958939450746145, "calibration/confidence_entropy": 0.5194667423516511, "calibration/coverage@0%": 0.0027366682974559685, "calibration/coverage@1%": 0.0027366682974559685, "calibration/coverage@10%": 0.025431139921722114, "calibration/coverage@15%": 0.092284582925636, "calibration/coverage@20%": 0.14117158721365258, "calibration/coverage@25%": 0.2542489735620276, "calibration/coverage@30%": 0.43019097621925484, "calibration/coverage@5%": 0.0027366682974559685, "calibration/ece": 0.11552021771290169, "calibration/mean_confidence": 0.5210242359988329, "calibration/prompt_uniqueness": 0.8861906174575818, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 1388.2, "completions/max_terminated_length": 1059.2, "completions/mean_length": 172.62021484375, "completions/mean_terminated_length": 171.42176208496093, "completions/min_length": 73.6, "completions/min_terminated_length": 73.6, "epoch": 0.416, "grad_norm": 0.0010845692595466971, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 432141442.0, "reward": 0.9234651088714599, "reward_std": 0.10108603686094284, "rewards/accuracy_reward": 0.518359375, "rewards/brier_reward": 0.7800793528556824, "rewards/confidence_uniqueness_reward": 0.9633147358894348, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.00315277217887342, "rewards/frontier_coverage_0": 0.10128591805696488, "rewards/frontier_coverage_1": 0.10128591805696488, "rewards/frontier_coverage_10": 0.10128591805696488, "rewards/frontier_coverage_15": 0.10128591805696488, "rewards/frontier_coverage_20": 0.10128591805696488, "rewards/frontier_coverage_25": 0.10118604749441147, "rewards/frontier_coverage_5": 0.10128591805696488, "rewards/frontier_ece_reward": 0.01040429063141346, "rewards/frontier_entropy_batch_reward": -0.1947682112455368, "signal/accuracy_reward/centered_abs_mean": 0.11812744140625, "signal/accuracy_reward/group_bin_occupancy": 0.178515625, "signal/accuracy_reward/group_std_mean": 0.15311342775821685, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059063720703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.059063720703125, "signal/advantage_abs_mean": 0.07872487008571624, "signal/advantage_pre_scale_abs_mean": 0.07872487008571624, "signal/advantage_pre_scale_std": 0.1202880859375, "signal/advantage_std": 0.1202880859375, "signal/brier_reward/centered_abs_mean": 0.14082336127758027, "signal/brier_reward/group_bin_occupancy": 0.875, "signal/brier_reward/group_std_mean": 0.17933386862277984, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01408233605325222, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01408233605325222, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013442078977823258, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.888671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.01916816532611847, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013442079536616803, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013442079536616803, "signal/format_reward/centered_abs_mean": 0.001678466796875, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.004299227613955736, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008392333984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008392333984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031103747431188824, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875, "signal/frontier_aurc_reward/group_std_mean": 0.005056559341028333, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.887968414346687e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.887968414346687e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18912857472896577, "signal/frontier_coverage_0/group_bin_occupancy": 0.8875, "signal/frontier_coverage_0/group_std_mean": 0.23899484276771546, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_1/centered_abs_mean": 0.18912857472896577, "signal/frontier_coverage_1/group_bin_occupancy": 0.8875, "signal/frontier_coverage_1/group_std_mean": 0.23899484276771546, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_10/centered_abs_mean": 0.18912857472896577, "signal/frontier_coverage_10/group_bin_occupancy": 0.8875, "signal/frontier_coverage_10/group_std_mean": 0.23899484276771546, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_15/centered_abs_mean": 0.18912857472896577, "signal/frontier_coverage_15/group_bin_occupancy": 0.8875, "signal/frontier_coverage_15/group_std_mean": 0.23899484276771546, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_20/centered_abs_mean": 0.18912857472896577, "signal/frontier_coverage_20/group_bin_occupancy": 0.8875, "signal/frontier_coverage_20/group_std_mean": 0.23899484276771546, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_25/centered_abs_mean": 0.18864355981349945, "signal/frontier_coverage_25/group_bin_occupancy": 0.8875, "signal/frontier_coverage_25/group_std_mean": 0.2383899211883545, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023580444511026146, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023580444511026146, "signal/frontier_coverage_5/centered_abs_mean": 0.18912857472896577, "signal/frontier_coverage_5/group_bin_occupancy": 0.8875, "signal/frontier_coverage_5/group_std_mean": 0.23899484276771546, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002364107267931104, "signal/frontier_ece_reward/centered_abs_mean": 0.018303705751895903, "signal/frontier_ece_reward/group_bin_occupancy": 0.843359375, "signal/frontier_ece_reward/group_std_mean": 0.026779073104262353, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001830370631068945, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001830370631068945, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27329595685005187, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3500793755054474, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027329596504569054, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027329596504569054, "step": 130 }, { "calibration/aurc": 0.2711466896875542, "calibration/batch_distribution_entropy": 0.9824704911633383, "calibration/batch_entropy_100bins": 0.957537800410772, "calibration/batch_entropy_10bins": 0.9824704911633383, "calibration/batch_entropy_50bins": 0.9750580937567286, "calibration/batch_uniqueness": 0.964057967535398, "calibration/buffer_distribution_entropy": 0.9990826808437717, "calibration/buffer_entropy_100bins": 0.9903883943102378, "calibration/buffer_entropy_10bins": 0.9990826808437717, "calibration/buffer_entropy_50bins": 0.9955856525305034, "calibration/confidence_entropy": 0.4754514819609604, "calibration/coverage@0%": 0.005876225490196079, "calibration/coverage@1%": 0.005876225490196079, "calibration/coverage@10%": 0.07948835784313726, "calibration/coverage@15%": 0.22814797794117644, "calibration/coverage@20%": 0.3243229166666667, "calibration/coverage@25%": 0.42829197303921573, "calibration/coverage@30%": 0.5064721200980392, "calibration/coverage@5%": 0.024699754901960785, "calibration/ece": 0.11323366571529961, "calibration/mean_confidence": 0.5354937992420586, "calibration/prompt_uniqueness": 0.8668369638560094, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 948.8, "completions/max_terminated_length": 640.8, "completions/mean_length": 176.71396484375, "completions/mean_terminated_length": 176.18282775878907, "completions/min_length": 83.8, "completions/min_terminated_length": 83.8, "epoch": 0.432, "grad_norm": 0.0012650451390072703, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 448965329.0, "reward": 0.9427853345870971, "reward_std": 0.09141346216201782, "rewards/accuracy_reward": 0.5541015625, "rewards/brier_reward": 0.795646071434021, "rewards/confidence_uniqueness_reward": 0.9624568223953247, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002685644570738077, "rewards/frontier_coverage_0": 0.10276300571858883, "rewards/frontier_coverage_1": 0.10276300571858883, "rewards/frontier_coverage_10": 0.10276300571858883, "rewards/frontier_coverage_15": 0.10276300571858883, "rewards/frontier_coverage_20": 0.10276300571858883, "rewards/frontier_coverage_25": 0.10133399069309235, "rewards/frontier_coverage_5": 0.10276300571858883, "rewards/frontier_ece_reward": 0.012095463648438453, "rewards/frontier_entropy_batch_reward": -0.2003028452396393, "signal/accuracy_reward/centered_abs_mean": 0.10240478515625, "signal/accuracy_reward/group_bin_occupancy": 0.173046875, "signal/accuracy_reward/group_std_mean": 0.13419998735189437, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051202392578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051202392578125, "signal/advantage_abs_mean": 0.07144368439912796, "signal/advantage_pre_scale_abs_mean": 0.07144368439912796, "signal/advantage_pre_scale_std": 0.11116426140069961, "signal/advantage_std": 0.11116426140069961, "signal/brier_reward/centered_abs_mean": 0.1337427169084549, "signal/brier_reward/group_bin_occupancy": 0.84296875, "signal/brier_reward/group_std_mean": 0.171070197224617, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013374271430075168, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013374271430075168, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013302310928702354, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.88203125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01829577349126339, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001330231106840074, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001330231106840074, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086068242787, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003123843017965555, "signal/frontier_aurc_reward/group_bin_occupancy": 0.709765625, "signal/frontier_aurc_reward/group_std_mean": 0.0052942352835088965, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.904803670593537e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.904803670593537e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1775657594203949, "signal/frontier_coverage_0/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_0/group_std_mean": 0.22634563744068145, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_1/centered_abs_mean": 0.1775657594203949, "signal/frontier_coverage_1/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_1/group_std_mean": 0.22634563744068145, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_10/centered_abs_mean": 0.1775657594203949, "signal/frontier_coverage_10/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_10/group_std_mean": 0.22634563744068145, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_15/centered_abs_mean": 0.1775657594203949, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.22634563744068145, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_20/centered_abs_mean": 0.1775657594203949, "signal/frontier_coverage_20/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_20/group_std_mean": 0.22634563744068145, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_25/centered_abs_mean": 0.17443813383579254, "signal/frontier_coverage_25/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_25/group_std_mean": 0.22237459123134612, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021804766729474068, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021804766729474068, "signal/frontier_coverage_5/centered_abs_mean": 0.1775657594203949, "signal/frontier_coverage_5/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_5/group_std_mean": 0.22634563744068145, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022195718716830014, "signal/frontier_ece_reward/centered_abs_mean": 0.017705311998724937, "signal/frontier_ece_reward/group_bin_occupancy": 0.836328125, "signal/frontier_ece_reward/group_std_mean": 0.025692766532301903, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017705312930047512, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017705312930047512, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27649489641189573, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3526521801948547, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027649490535259245, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027649490535259245, "step": 135 }, { "calibration/aurc": 0.3010584308084431, "calibration/batch_distribution_entropy": 0.9641241091381325, "calibration/batch_entropy_100bins": 0.9505054970417863, "calibration/batch_entropy_10bins": 0.9641241091381325, "calibration/batch_entropy_50bins": 0.9653212354537498, "calibration/batch_uniqueness": 0.9634041782070479, "calibration/buffer_distribution_entropy": 0.9983903237821101, "calibration/buffer_entropy_100bins": 0.9894302549349414, "calibration/buffer_entropy_10bins": 0.9983903237821101, "calibration/buffer_entropy_50bins": 0.9949646771618358, "calibration/confidence_entropy": 0.5043917074884562, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.032421875, "calibration/coverage@15%": 0.092578125, "calibration/coverage@20%": 0.1625, "calibration/coverage@25%": 0.28093428938356163, "calibration/coverage@30%": 0.4881788160469667, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.13595115217602646, "calibration/mean_confidence": 0.5649633774254619, "calibration/prompt_uniqueness": 0.8865723164347035, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1536.0, "completions/max_terminated_length": 563.2, "completions/mean_length": 187.29921875, "completions/mean_terminated_length": 186.64035034179688, "completions/min_length": 76.4, "completions/min_terminated_length": 76.4, "epoch": 0.448, "grad_norm": 0.00112549914047122, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 465836073.0, "reward": 0.9279234051704407, "reward_std": 0.09372627437114715, "rewards/accuracy_reward": 0.52568359375, "rewards/brier_reward": 0.7868773221969605, "rewards/confidence_uniqueness_reward": 0.9617484331130981, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0032899423968046904, "rewards/frontier_coverage_0": 0.10912051647901536, "rewards/frontier_coverage_1": 0.10912051647901536, "rewards/frontier_coverage_10": 0.10912051647901536, "rewards/frontier_coverage_15": 0.10912051647901536, "rewards/frontier_coverage_20": 0.10912051647901536, "rewards/frontier_coverage_25": 0.10755196064710618, "rewards/frontier_coverage_5": 0.10912051647901536, "rewards/frontier_ece_reward": 0.010184999741613864, "rewards/frontier_entropy_batch_reward": -0.19944992065429687, "signal/accuracy_reward/centered_abs_mean": 0.107452392578125, "signal/accuracy_reward/group_bin_occupancy": 0.172265625, "signal/accuracy_reward/group_std_mean": 0.1377037927508354, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0537261962890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0537261962890625, "signal/advantage_abs_mean": 0.0733189657330513, "signal/advantage_pre_scale_abs_mean": 0.0733189657330513, "signal/advantage_pre_scale_std": 0.11423833519220353, "signal/advantage_std": 0.11423833519220353, "signal/brier_reward/centered_abs_mean": 0.14163122177124024, "signal/brier_reward/group_bin_occupancy": 0.85234375, "signal/brier_reward/group_std_mean": 0.18140933215618132, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01416312251240015, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01416312251240015, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013178028725087642, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.90078125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01842593662440777, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013178028631955386, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013178028631955386, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0035306816920638085, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003526174183934927, "signal/frontier_aurc_reward/group_bin_occupancy": 0.690234375, "signal/frontier_aurc_reward/group_std_mean": 0.006031551398336887, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.40771778812632e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.40771778812632e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18577166497707367, "signal/frontier_coverage_0/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_0/group_std_mean": 0.23553779423236848, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_1/centered_abs_mean": 0.18577166497707367, "signal/frontier_coverage_1/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_1/group_std_mean": 0.23553779423236848, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_10/centered_abs_mean": 0.18577166497707367, "signal/frontier_coverage_10/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_10/group_std_mean": 0.23553779423236848, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_15/centered_abs_mean": 0.18577166497707367, "signal/frontier_coverage_15/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_15/group_std_mean": 0.23553779423236848, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_20/centered_abs_mean": 0.18577166497707367, "signal/frontier_coverage_20/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_20/group_std_mean": 0.23553779423236848, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_25/centered_abs_mean": 0.17763153314590455, "signal/frontier_coverage_25/group_bin_occupancy": 0.878125, "signal/frontier_coverage_25/group_std_mean": 0.22553324997425078, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022203943226486444, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022203943226486444, "signal/frontier_coverage_5/centered_abs_mean": 0.18577166497707367, "signal/frontier_coverage_5/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_5/group_std_mean": 0.23553779423236848, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023221459705382586, "signal/frontier_ece_reward/centered_abs_mean": 0.017202311754226686, "signal/frontier_ece_reward/group_bin_occupancy": 0.82265625, "signal/frontier_ece_reward/group_std_mean": 0.025163047760725022, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017202311893925071, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017202311893925071, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2695195287466049, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34306603074073794, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026951952651143075, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026951952651143075, "step": 140 }, { "calibration/aurc": 0.3905655690563325, "calibration/batch_distribution_entropy": 0.9803796350881079, "calibration/batch_entropy_100bins": 0.960040144438605, "calibration/batch_entropy_10bins": 0.9803796350881079, "calibration/batch_entropy_50bins": 0.9728294233983587, "calibration/batch_uniqueness": 0.9602142333984375, "calibration/buffer_distribution_entropy": 0.9979704423206602, "calibration/buffer_entropy_100bins": 0.988638040636473, "calibration/buffer_entropy_10bins": 0.9979704423206602, "calibration/buffer_entropy_50bins": 0.9945346302719053, "calibration/confidence_entropy": 0.5143790373022907, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.016015625, "calibration/coverage@15%": 0.03046875, "calibration/coverage@20%": 0.125, "calibration/coverage@25%": 0.183984375, "calibration/coverage@30%": 0.294921875, "calibration/coverage@5%": 0.008984375, "calibration/ece": 0.13917155590776573, "calibration/mean_confidence": 0.5079795467692557, "calibration/prompt_uniqueness": 0.87216796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1347.4, "completions/max_terminated_length": 1031.8, "completions/mean_length": 191.52421875, "completions/mean_terminated_length": 190.86587219238282, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.464, "grad_norm": 0.0007277204422280192, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 482968097.0, "reward": 0.8982602834701539, "reward_std": 0.08826989978551865, "rewards/accuracy_reward": 0.46787109375, "rewards/brier_reward": 0.7684149622917176, "rewards/confidence_uniqueness_reward": 0.959271764755249, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0039596253540366885, "rewards/frontier_coverage_0": 0.1321122795343399, "rewards/frontier_coverage_1": 0.1321122795343399, "rewards/frontier_coverage_10": 0.1321122795343399, "rewards/frontier_coverage_15": 0.1321122795343399, "rewards/frontier_coverage_20": 0.1321122795343399, "rewards/frontier_coverage_25": 0.12441358044743538, "rewards/frontier_coverage_5": 0.1321122795343399, "rewards/frontier_ece_reward": 0.007002122979611158, "rewards/frontier_entropy_batch_reward": -0.2031411647796631, "signal/accuracy_reward/centered_abs_mean": 0.085882568359375, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.1213410884141922, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429412841796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0429412841796875, "signal/advantage_abs_mean": 0.06655814126133919, "signal/advantage_pre_scale_abs_mean": 0.06655814126133919, "signal/advantage_pre_scale_std": 0.10574809014797211, "signal/advantage_std": 0.10574809014797211, "signal/brier_reward/centered_abs_mean": 0.14038530886173248, "signal/brier_reward/group_bin_occupancy": 0.861328125, "signal/brier_reward/group_std_mean": 0.1817769706249237, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014038531482219696, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014038531482219696, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013213860616087914, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.916796875, "signal/confidence_uniqueness_reward/group_std_mean": 0.018184344843029977, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013213861035183071, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013213861035183071, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036213297862559557, "signal/frontier_aurc_reward/group_bin_occupancy": 0.691796875, "signal/frontier_aurc_reward/group_std_mean": 0.006112007796764374, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.526662451098673e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.526662451098673e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17165872752666472, "signal/frontier_coverage_0/group_bin_occupancy": 0.884375, "signal/frontier_coverage_0/group_std_mean": 0.22206704020500184, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_1/centered_abs_mean": 0.17165872752666472, "signal/frontier_coverage_1/group_bin_occupancy": 0.884375, "signal/frontier_coverage_1/group_std_mean": 0.22206704020500184, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_10/centered_abs_mean": 0.17165872752666472, "signal/frontier_coverage_10/group_bin_occupancy": 0.884375, "signal/frontier_coverage_10/group_std_mean": 0.22206704020500184, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_15/centered_abs_mean": 0.17165872752666472, "signal/frontier_coverage_15/group_bin_occupancy": 0.884375, "signal/frontier_coverage_15/group_std_mean": 0.22206704020500184, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_20/centered_abs_mean": 0.17165872752666472, "signal/frontier_coverage_20/group_bin_occupancy": 0.884375, "signal/frontier_coverage_20/group_std_mean": 0.22206704020500184, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_25/centered_abs_mean": 0.16017631590366363, "signal/frontier_coverage_25/group_bin_occupancy": 0.87734375, "signal/frontier_coverage_25/group_std_mean": 0.20728962421417235, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020022039767354726, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020022039767354726, "signal/frontier_coverage_5/centered_abs_mean": 0.17165872752666472, "signal/frontier_coverage_5/group_bin_occupancy": 0.884375, "signal/frontier_coverage_5/group_std_mean": 0.22206704020500184, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145734056830406, "signal/frontier_ece_reward/centered_abs_mean": 0.01575020458549261, "signal/frontier_ece_reward/group_bin_occupancy": 0.818359375, "signal/frontier_ece_reward/group_std_mean": 0.02388475425541401, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015750204911455512, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015750204911455512, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2742986440658569, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34781610369682314, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027429865673184395, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027429865673184395, "step": 145 }, { "calibration/aurc": 0.2997569453896353, "calibration/batch_distribution_entropy": 0.9792118212906589, "calibration/batch_entropy_100bins": 0.9606337149903614, "calibration/batch_entropy_10bins": 0.9792118212906589, "calibration/batch_entropy_50bins": 0.9712305855926182, "calibration/batch_uniqueness": 0.9567291259765625, "calibration/buffer_distribution_entropy": 0.9978132956107622, "calibration/buffer_entropy_100bins": 0.9881803114059542, "calibration/buffer_entropy_10bins": 0.9978132956107622, "calibration/buffer_entropy_50bins": 0.9942871392551943, "calibration/confidence_entropy": 0.4884862201915487, "calibration/coverage@0%": 0.0109375, "calibration/coverage@1%": 0.0109375, "calibration/coverage@10%": 0.016015625, "calibration/coverage@15%": 0.1421875, "calibration/coverage@20%": 0.287109375, "calibration/coverage@25%": 0.38203125, "calibration/coverage@30%": 0.556640625, "calibration/coverage@5%": 0.0125, "calibration/ece": 0.13674428667727873, "calibration/mean_confidence": 0.509401226049053, "calibration/prompt_uniqueness": 0.86796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1536.0, "completions/max_terminated_length": 947.6, "completions/mean_length": 194.65380859375, "completions/mean_terminated_length": 193.9985321044922, "completions/min_length": 86.8, "completions/min_terminated_length": 86.8, "epoch": 0.48, "grad_norm": 0.0011819824576377869, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 500009384.0, "reward": 0.9241943120956421, "reward_std": 0.09635329693555832, "rewards/accuracy_reward": 0.51845703125, "rewards/brier_reward": 0.7792062282562255, "rewards/confidence_uniqueness_reward": 0.9568382143974304, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0031862443778663875, "rewards/frontier_coverage_0": 0.11428727954626083, "rewards/frontier_coverage_1": 0.11428727954626083, "rewards/frontier_coverage_10": 0.11428727954626083, "rewards/frontier_coverage_15": 0.11428727954626083, "rewards/frontier_coverage_20": 0.11371297538280487, "rewards/frontier_coverage_25": 0.1081900030374527, "rewards/frontier_coverage_5": 0.11428727954626083, "rewards/frontier_ece_reward": 0.007919181045144797, "rewards/frontier_entropy_batch_reward": -0.19063332974910735, "signal/accuracy_reward/centered_abs_mean": 0.120074462890625, "signal/accuracy_reward/group_bin_occupancy": 0.18046875, "signal/accuracy_reward/group_std_mean": 0.15685472190380095, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0600372314453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0600372314453125, "signal/advantage_abs_mean": 0.07497897297143936, "signal/advantage_pre_scale_abs_mean": 0.07497897297143936, "signal/advantage_pre_scale_std": 0.11704835444688796, "signal/advantage_std": 0.11704835444688796, "signal/brier_reward/centered_abs_mean": 0.13743520379066468, "signal/brier_reward/group_bin_occupancy": 0.838671875, "signal/brier_reward/group_std_mean": 0.17770840525627135, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013743520341813564, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013743520341813564, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01321981344372034, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93359375, "signal/confidence_uniqueness_reward/group_std_mean": 0.018044329062104226, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001321981381624937, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001321981381624937, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031342420261353254, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70390625, "signal/frontier_aurc_reward/group_std_mean": 0.005184091906994581, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.917802387150004e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.917802387150004e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.18790066838264466, "signal/frontier_coverage_0/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_0/group_std_mean": 0.24077147245407104, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_1/centered_abs_mean": 0.18790066838264466, "signal/frontier_coverage_1/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_1/group_std_mean": 0.24077147245407104, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_10/centered_abs_mean": 0.18790066838264466, "signal/frontier_coverage_10/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_10/group_std_mean": 0.24077147245407104, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_15/centered_abs_mean": 0.18790066838264466, "signal/frontier_coverage_15/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_15/group_std_mean": 0.24077147245407104, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_20/centered_abs_mean": 0.18661079108715056, "signal/frontier_coverage_20/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_20/group_std_mean": 0.23916022181510926, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002332634944468737, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002332634944468737, "signal/frontier_coverage_25/centered_abs_mean": 0.17039817869663237, "signal/frontier_coverage_25/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_25/group_std_mean": 0.21902235150337218, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021299772663041948, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021299772663041948, "signal/frontier_coverage_5/centered_abs_mean": 0.18790066838264466, "signal/frontier_coverage_5/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_5/group_std_mean": 0.24077147245407104, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023487584665417673, "signal/frontier_ece_reward/centered_abs_mean": 0.014475966058671474, "signal/frontier_ece_reward/group_bin_occupancy": 0.822265625, "signal/frontier_ece_reward/group_std_mean": 0.022006630897521973, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014475966105237602, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014475966105237602, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26103139519691465, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3364805102348328, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02610314004123211, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02610314004123211, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.4928207956337317, "eval_calibration/batch_distribution_entropy": 0.9448460828266618, "eval_calibration/batch_entropy_100bins": 0.7140585686804589, "eval_calibration/batch_entropy_10bins": 0.9448460828266618, "eval_calibration/batch_entropy_50bins": 0.7938259760149764, "eval_calibration/batch_uniqueness": 0.9052734375, "eval_calibration/buffer_distribution_entropy": 0.9977152222728605, "eval_calibration/buffer_entropy_100bins": 0.9880421883831635, "eval_calibration/buffer_entropy_10bins": 0.9977152222728605, "eval_calibration/buffer_entropy_50bins": 0.9941686668937713, "eval_calibration/confidence_entropy": 0.4849995680216249, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.09375, "eval_calibration/coverage@25%": 0.1484375, "eval_calibration/coverage@30%": 0.1484375, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.22999326971199952, "eval_calibration/mean_confidence": 0.4614743513558307, "eval_calibration/prompt_uniqueness": 0.9052734375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 434.5, "eval_completions/max_terminated_length": 434.5, "eval_completions/mean_length": 194.3330421447754, "eval_completions/mean_terminated_length": 194.3330421447754, "eval_completions/min_length": 98.0, "eval_completions/min_terminated_length": 98.0, "eval_loss": 0.0, "eval_num_tokens": 500009384.0, "eval_reward": 0.7936547994613647, "eval_reward_std": 0.2236923649907112, "eval_rewards/accuracy_reward": 0.416015625, "eval_rewards/brier_reward": 0.7854552268981934, "eval_rewards/confidence_uniqueness_reward": 0.904296875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0038128122105263174, "eval_rewards/frontier_coverage_0": 0.18677300214767456, "eval_rewards/frontier_coverage_1": 0.18677300214767456, "eval_rewards/frontier_coverage_10": 0.18677300214767456, "eval_rewards/frontier_coverage_15": 0.18677300214767456, "eval_rewards/frontier_coverage_20": 0.18603158369660378, "eval_rewards/frontier_coverage_25": 0.1658840924501419, "eval_rewards/frontier_coverage_5": 0.18677300214767456, "eval_rewards/frontier_ece_reward": 0.0064718994544819, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.8621, "eval_samples_per_second": 21.87, "eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49238111078739166, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375, "eval_signal/advantage_abs_mean": 0.20876647159457207, "eval_signal/advantage_pre_scale_abs_mean": 0.20876647159457207, "eval_signal/advantage_pre_scale_std": 0.22128642722964287, "eval_signal/advantage_std": 0.22128642722964287, "eval_signal/brier_reward/centered_abs_mean": 0.1912023350596428, "eval_signal/brier_reward/group_bin_occupancy": 0.890625, "eval_signal/brier_reward/group_std_mean": 0.2429308146238327, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019120234064757824, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019120234064757824, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0374603271484375, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.390625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04364745691418648, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003746032773051411, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003746032773051411, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004567834781482816, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008484951569698751, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7097938224615064e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7097938224615064e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.35640130937099457, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_0/group_std_mean": 0.4366024136543274, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.35640130937099457, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_1/group_std_mean": 0.4366024136543274, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.35640130937099457, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_10/group_std_mean": 0.4366024136543274, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.35640130937099457, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_15/group_std_mean": 0.4366024136543274, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.35413555800914764, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_20/group_std_mean": 0.4339291825890541, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044266944751143456, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044266944751143456, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.31332943588495255, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_25/group_std_mean": 0.3852032795548439, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003916618006769568, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003916618006769568, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.35640130937099457, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_5/group_std_mean": 0.4366024136543274, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044550164602696896, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013189757708460093, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.90625, "eval_signal/frontier_ece_reward/group_std_mean": 0.018085308838635683, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013189757883083075, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013189757883083075, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.175, "step": 150 }, { "calibration/aurc": 0.3905210632456232, "calibration/batch_distribution_entropy": 0.9905343495592323, "calibration/batch_entropy_100bins": 0.9699764400711286, "calibration/batch_entropy_10bins": 0.9905343495592323, "calibration/batch_entropy_50bins": 0.97994022779757, "calibration/batch_uniqueness": 0.9584047876376637, "calibration/buffer_distribution_entropy": 0.9977835093627524, "calibration/buffer_entropy_100bins": 0.9880724924738754, "calibration/buffer_entropy_10bins": 0.9977835093627524, "calibration/buffer_entropy_50bins": 0.9941551940853858, "calibration/confidence_entropy": 0.500662742867437, "calibration/coverage@0%": 0.005078889432485323, "calibration/coverage@1%": 0.005078889432485323, "calibration/coverage@10%": 0.08125076443248533, "calibration/coverage@15%": 0.1417976394324853, "calibration/coverage@20%": 0.1953132644324853, "calibration/coverage@25%": 0.2523613319471624, "calibration/coverage@30%": 0.29494786570450093, "calibration/coverage@5%": 0.03828201443248532, "calibration/ece": 0.13589736223677878, "calibration/mean_confidence": 0.49380390196528123, "calibration/prompt_uniqueness": 0.8630719648315557, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1201.8, "completions/max_terminated_length": 791.8, "completions/mean_length": 193.1357421875, "completions/mean_terminated_length": 192.3479034423828, "completions/min_length": 88.4, "completions/min_terminated_length": 88.4, "epoch": 0.496, "grad_norm": 0.0008443639962933958, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 517294934.0, "reward": 0.9380232334136963, "reward_std": 0.08917539864778519, "rewards/accuracy_reward": 0.547265625, "rewards/brier_reward": 0.7819605112075806, "rewards/confidence_uniqueness_reward": 0.956698739528656, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002871061023324728, "rewards/frontier_coverage_0": 0.09036671817302704, "rewards/frontier_coverage_1": 0.09036671817302704, "rewards/frontier_coverage_10": 0.09036671817302704, "rewards/frontier_coverage_15": 0.09036671817302704, "rewards/frontier_coverage_20": 0.08979679197072983, "rewards/frontier_coverage_25": 0.075638347864151, "rewards/frontier_coverage_5": 0.09036671817302704, "rewards/frontier_ece_reward": 0.006273471284657717, "rewards/frontier_entropy_batch_reward": -0.17489843368530272, "signal/accuracy_reward/centered_abs_mean": 0.0958251953125, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.13047634959220886, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04791259765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04791259765625, "signal/advantage_abs_mean": 0.06811611354351044, "signal/advantage_pre_scale_abs_mean": 0.06811611354351044, "signal/advantage_pre_scale_std": 0.10838208794593811, "signal/advantage_std": 0.10838208794593811, "signal/brier_reward/centered_abs_mean": 0.13192115724086761, "signal/brier_reward/group_bin_occupancy": 0.869140625, "signal/brier_reward/group_std_mean": 0.168493589758873, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013192116282880306, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013192116282880306, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012967484071850777, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9390625, "signal/confidence_uniqueness_reward/group_std_mean": 0.018038667924702167, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001296748430468142, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001296748430468142, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028141734655946493, "signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375, "signal/frontier_aurc_reward/group_std_mean": 0.004710181429982185, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.517716831993312e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.517716831993312e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16962937116622925, "signal/frontier_coverage_0/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_0/group_std_mean": 0.2177934467792511, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_1/centered_abs_mean": 0.16962937116622925, "signal/frontier_coverage_1/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_1/group_std_mean": 0.2177934467792511, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_10/centered_abs_mean": 0.16962937116622925, "signal/frontier_coverage_10/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_10/group_std_mean": 0.2177934467792511, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_15/centered_abs_mean": 0.16962937116622925, "signal/frontier_coverage_15/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_15/group_std_mean": 0.2177934467792511, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_20/centered_abs_mean": 0.16856757402420045, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.2164437383413315, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021070946007966996, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021070946007966996, "signal/frontier_coverage_25/centered_abs_mean": 0.14662111103534697, "signal/frontier_coverage_25/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_25/group_std_mean": 0.1890866458415985, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001832763897255063, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001832763897255063, "signal/frontier_coverage_5/centered_abs_mean": 0.16962937116622925, "signal/frontier_coverage_5/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_5/group_std_mean": 0.2177934467792511, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002120367041788995, "signal/frontier_ece_reward/centered_abs_mean": 0.011038328520953655, "signal/frontier_ece_reward/group_bin_occupancy": 0.86796875, "signal/frontier_ece_reward/group_std_mean": 0.014819971285760403, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011038328986614943, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011038328986614943, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.256817501783371, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3358631134033203, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025681750476360322, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025681750476360322, "step": 155 }, { "calibration/aurc": 0.31790763537690625, "calibration/batch_distribution_entropy": 0.9855522887040239, "calibration/batch_entropy_100bins": 0.9685646417212451, "calibration/batch_entropy_10bins": 0.9855522887040239, "calibration/batch_entropy_50bins": 0.9776787715207496, "calibration/batch_uniqueness": 0.9575121754105227, "calibration/buffer_distribution_entropy": 0.9980551921710852, "calibration/buffer_entropy_100bins": 0.9885994859100894, "calibration/buffer_entropy_10bins": 0.9980551921710852, "calibration/buffer_entropy_50bins": 0.9943812246542099, "calibration/confidence_entropy": 0.5099366283838993, "calibration/coverage@0%": 0.023481837084148728, "calibration/coverage@1%": 0.023481837084148728, "calibration/coverage@10%": 0.19366820572407045, "calibration/coverage@15%": 0.3277481347847358, "calibration/coverage@20%": 0.3950319532778865, "calibration/coverage@25%": 0.4333506604696673, "calibration/coverage@30%": 0.4865199975538161, "calibration/coverage@5%": 0.07631941046966731, "calibration/ece": 0.137644847278309, "calibration/mean_confidence": 0.5067424365660879, "calibration/prompt_uniqueness": 0.870397403014438, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1051.6, "completions/max_terminated_length": 664.6, "completions/mean_length": 187.24072265625, "completions/mean_terminated_length": 186.84423217773437, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.512, "grad_norm": 0.0013515661703422666, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 534357943.0, "reward": 0.9389643549919129, "reward_std": 0.08940067738294602, "rewards/accuracy_reward": 0.5443359375, "rewards/brier_reward": 0.8010304689407348, "rewards/confidence_uniqueness_reward": 0.9571277260780334, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002591008087620139, "rewards/frontier_coverage_0": 0.11089163199067116, "rewards/frontier_coverage_1": 0.11089163199067116, "rewards/frontier_coverage_10": 0.11089163199067116, "rewards/frontier_coverage_15": 0.11089163199067116, "rewards/frontier_coverage_20": 0.11040212810039521, "rewards/frontier_coverage_25": 0.0975722998380661, "rewards/frontier_coverage_5": 0.11089163199067116, "rewards/frontier_ece_reward": 0.007399659510701895, "rewards/frontier_entropy_batch_reward": -0.19110932052135468, "signal/accuracy_reward/centered_abs_mean": 0.09991455078125, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.13368143737316132, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049957275390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049957275390625, "signal/advantage_abs_mean": 0.0693469613790512, "signal/advantage_pre_scale_abs_mean": 0.0693469613790512, "signal/advantage_pre_scale_std": 0.10971838235855103, "signal/advantage_std": 0.10971838235855103, "signal/brier_reward/centered_abs_mean": 0.12356914579868317, "signal/brier_reward/group_bin_occupancy": 0.84921875, "signal/brier_reward/group_std_mean": 0.16091051399707795, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012356914579868317, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012356914579868317, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011988498829305172, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.946484375, "signal/confidence_uniqueness_reward/group_std_mean": 0.015966850332915783, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011988498736172915, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011988498736172915, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028497665654867886, "signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875, "signal/frontier_aurc_reward/group_std_mean": 0.004724315833300352, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.562208294169977e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.562208294169977e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1604818731546402, "signal/frontier_coverage_0/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_0/group_std_mean": 0.21088581383228303, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_1/centered_abs_mean": 0.1604818731546402, "signal/frontier_coverage_1/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_1/group_std_mean": 0.21088581383228303, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_10/centered_abs_mean": 0.1604818731546402, "signal/frontier_coverage_10/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_10/group_std_mean": 0.21088581383228303, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_15/centered_abs_mean": 0.1604818731546402, "signal/frontier_coverage_15/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_15/group_std_mean": 0.21088581383228303, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_20/centered_abs_mean": 0.15974161326885222, "signal/frontier_coverage_20/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_20/group_std_mean": 0.2099863260984421, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00199677012860775, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00199677012860775, "signal/frontier_coverage_25/centered_abs_mean": 0.1347096398472786, "signal/frontier_coverage_25/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_25/group_std_mean": 0.17761588990688323, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016838705167174339, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016838705167174339, "signal/frontier_coverage_5/centered_abs_mean": 0.1604818731546402, "signal/frontier_coverage_5/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_5/group_std_mean": 0.21088581383228303, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020060235168784858, "signal/frontier_ece_reward/centered_abs_mean": 0.010506413504481315, "signal/frontier_ece_reward/group_bin_occupancy": 0.89140625, "signal/frontier_ece_reward/group_std_mean": 0.01373392753303051, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010506413877010346, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010506413877010346, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2652657926082611, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.342242556810379, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02652658075094223, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02652658075094223, "step": 160 }, { "calibration/aurc": 0.2116557679486677, "calibration/batch_distribution_entropy": 0.9857721529055761, "calibration/batch_entropy_100bins": 0.9656648820603996, "calibration/batch_entropy_10bins": 0.9857721529055761, "calibration/batch_entropy_50bins": 0.9778444274441169, "calibration/batch_uniqueness": 0.9589366543250588, "calibration/buffer_distribution_entropy": 0.9982783670040604, "calibration/buffer_entropy_100bins": 0.9890758203614105, "calibration/buffer_entropy_10bins": 0.9982783670040604, "calibration/buffer_entropy_50bins": 0.9945921630733402, "calibration/confidence_entropy": 0.47936185596729713, "calibration/coverage@0%": 0.03830418297455969, "calibration/coverage@1%": 0.03830418297455969, "calibration/coverage@10%": 0.27363090141878665, "calibration/coverage@15%": 0.34475752201565557, "calibration/coverage@20%": 0.5264394263698631, "calibration/coverage@25%": 0.616313753669276, "calibration/coverage@30%": 0.7604887781311154, "calibration/coverage@5%": 0.17943141511741684, "calibration/ece": 0.11594575333235538, "calibration/mean_confidence": 0.5187620094786958, "calibration/prompt_uniqueness": 0.8553659111602497, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1092.6, "completions/max_terminated_length": 778.2, "completions/mean_length": 187.25498046875, "completions/mean_terminated_length": 186.8594207763672, "completions/min_length": 85.4, "completions/min_terminated_length": 85.4, "epoch": 0.528, "grad_norm": 0.0011601398000493646, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 551304970.0, "reward": 0.9393844962120056, "reward_std": 0.08786453604698181, "rewards/accuracy_reward": 0.54228515625, "rewards/brier_reward": 0.8001392245292663, "rewards/confidence_uniqueness_reward": 0.9570415496826172, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0026703955605626105, "rewards/frontier_coverage_0": 0.12262420728802681, "rewards/frontier_coverage_1": 0.12262420728802681, "rewards/frontier_coverage_10": 0.12262420728802681, "rewards/frontier_coverage_15": 0.12262420728802681, "rewards/frontier_coverage_20": 0.12122518271207809, "rewards/frontier_coverage_25": 0.10408189445734024, "rewards/frontier_coverage_5": 0.12262420728802681, "rewards/frontier_ece_reward": 0.008721418399363755, "rewards/frontier_entropy_batch_reward": -0.18648791313171387, "signal/accuracy_reward/centered_abs_mean": 0.104351806640625, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.1387265920639038, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521759033203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0521759033203125, "signal/advantage_abs_mean": 0.06842133551836013, "signal/advantage_pre_scale_abs_mean": 0.06842133551836013, "signal/advantage_pre_scale_std": 0.1080582544207573, "signal/advantage_std": 0.1080582544207573, "signal/brier_reward/centered_abs_mean": 0.12543393820524215, "signal/brier_reward/group_bin_occupancy": 0.84765625, "signal/brier_reward/group_std_mean": 0.16112754940986634, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012543394230306149, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012543394230306149, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011928396113216878, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95234375, "signal/confidence_uniqueness_reward/group_std_mean": 0.01579369381070137, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011928396532312035, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011928396532312035, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027430617716163396, "signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125, "signal/frontier_aurc_reward/group_std_mean": 0.004478739900514483, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4288274036953226e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4288274036953226e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17456578612327575, "signal/frontier_coverage_0/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_0/group_std_mean": 0.22599020898342131, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_1/centered_abs_mean": 0.17456578612327575, "signal/frontier_coverage_1/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_1/group_std_mean": 0.22599020898342131, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_10/centered_abs_mean": 0.17456578612327575, "signal/frontier_coverage_10/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_10/group_std_mean": 0.22599020898342131, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_15/centered_abs_mean": 0.17456578612327575, "signal/frontier_coverage_15/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_15/group_std_mean": 0.22599020898342131, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_20/centered_abs_mean": 0.17225814163684844, "signal/frontier_coverage_20/group_bin_occupancy": 0.8625, "signal/frontier_coverage_20/group_std_mean": 0.22308208048343658, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021532268263399603, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021532268263399603, "signal/frontier_coverage_25/centered_abs_mean": 0.139526429772377, "signal/frontier_coverage_25/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_25/group_std_mean": 0.1816743493080139, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017440804746001958, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017440804746001958, "signal/frontier_coverage_5/centered_abs_mean": 0.17456578612327575, "signal/frontier_coverage_5/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_5/group_std_mean": 0.22599020898342131, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021820723544806243, "signal/frontier_ece_reward/centered_abs_mean": 0.013731100969016552, "signal/frontier_ece_reward/group_bin_occupancy": 0.84921875, "signal/frontier_ece_reward/group_std_mean": 0.022244062460958957, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013731101527810097, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013731101527810097, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25302750468254087, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3287863492965698, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02530275024473667, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02530275024473667, "step": 165 }, { "calibration/aurc": 0.22570365505708892, "calibration/batch_distribution_entropy": 0.9843955404941772, "calibration/batch_entropy_100bins": 0.9662716763828755, "calibration/batch_entropy_10bins": 0.9843955404941772, "calibration/batch_entropy_50bins": 0.9777331174175243, "calibration/batch_uniqueness": 0.959429931640625, "calibration/buffer_distribution_entropy": 0.9982544609568785, "calibration/buffer_entropy_100bins": 0.9893260796188164, "calibration/buffer_entropy_10bins": 0.9982544609568785, "calibration/buffer_entropy_50bins": 0.9945346311546374, "calibration/confidence_entropy": 0.4794441883496332, "calibration/coverage@0%": 0.021875, "calibration/coverage@1%": 0.05, "calibration/coverage@10%": 0.196875, "calibration/coverage@15%": 0.2703125, "calibration/coverage@20%": 0.538671875, "calibration/coverage@25%": 0.646484375, "calibration/coverage@30%": 0.73046875, "calibration/coverage@5%": 0.11171875, "calibration/ece": 0.0953630593548969, "calibration/mean_confidence": 0.5133342115984929, "calibration/prompt_uniqueness": 0.8658203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1536.0, "completions/max_terminated_length": 717.8, "completions/mean_length": 187.49580078125, "completions/mean_terminated_length": 186.83703002929687, "completions/min_length": 81.2, "completions/min_terminated_length": 81.2, "epoch": 0.544, "grad_norm": 0.0014953252393752337, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 568388511.0, "reward": 0.9478225350379944, "reward_std": 0.09547350853681565, "rewards/accuracy_reward": 0.5732421875, "rewards/brier_reward": 0.7815747022628784, "rewards/confidence_uniqueness_reward": 0.957908034324646, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0027830671519041062, "rewards/frontier_coverage_0": 0.07353707253932953, "rewards/frontier_coverage_1": 0.07353707253932953, "rewards/frontier_coverage_10": 0.07353707253932953, "rewards/frontier_coverage_15": 0.07353707253932953, "rewards/frontier_coverage_20": 0.07318145632743836, "rewards/frontier_coverage_25": 0.058940806239843366, "rewards/frontier_coverage_5": 0.07353707253932953, "rewards/frontier_ece_reward": 0.0069223855622112754, "rewards/frontier_entropy_batch_reward": -0.19407747387886048, "signal/accuracy_reward/centered_abs_mean": 0.1157470703125, "signal/accuracy_reward/group_bin_occupancy": 0.182421875, "signal/accuracy_reward/group_std_mean": 0.15591520071029663, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05787353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05787353515625, "signal/advantage_abs_mean": 0.07254650890827179, "signal/advantage_pre_scale_abs_mean": 0.07254650890827179, "signal/advantage_pre_scale_std": 0.11336593627929688, "signal/advantage_std": 0.11336593627929688, "signal/brier_reward/centered_abs_mean": 0.13599575757980348, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.1739354431629181, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01359957605600357, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01359957605600357, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012918978370726109, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.931640625, "signal/confidence_uniqueness_reward/group_std_mean": 0.017537206411361694, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012918978696689009, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012918978696689009, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028809635899960996, "signal/frontier_aurc_reward/group_bin_occupancy": 0.698046875, "signal/frontier_aurc_reward/group_std_mean": 0.004857636988162994, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.601204407459591e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.601204407459591e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1849408507347107, "signal/frontier_coverage_0/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_0/group_std_mean": 0.2365315616130829, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_1/centered_abs_mean": 0.1849408507347107, "signal/frontier_coverage_1/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_1/group_std_mean": 0.2365315616130829, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_10/centered_abs_mean": 0.1849408507347107, "signal/frontier_coverage_10/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_10/group_std_mean": 0.2365315616130829, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_15/centered_abs_mean": 0.1849408507347107, "signal/frontier_coverage_15/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_15/group_std_mean": 0.2365315616130829, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_20/centered_abs_mean": 0.18298504054546355, "signal/frontier_coverage_20/group_bin_occupancy": 0.86875, "signal/frontier_coverage_20/group_std_mean": 0.2340652674436569, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022873131558299063, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022873131558299063, "signal/frontier_coverage_25/centered_abs_mean": 0.13958741277456282, "signal/frontier_coverage_25/group_bin_occupancy": 0.859375, "signal/frontier_coverage_25/group_std_mean": 0.1793098896741867, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017448426457121967, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017448426457121967, "signal/frontier_coverage_5/centered_abs_mean": 0.1849408507347107, "signal/frontier_coverage_5/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_5/group_std_mean": 0.2365315616130829, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002311760699376464, "signal/frontier_ece_reward/centered_abs_mean": 0.012716376781463623, "signal/frontier_ece_reward/group_bin_occupancy": 0.83515625, "signal/frontier_ece_reward/group_std_mean": 0.021388059109449388, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012716377153992654, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012716377153992654, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2714007079601288, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34507684111595155, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027140070497989655, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027140070497989655, "step": 170 }, { "calibration/aurc": 0.2630419824140453, "calibration/batch_distribution_entropy": 0.9855633289125907, "calibration/batch_entropy_100bins": 0.9657104346168568, "calibration/batch_entropy_10bins": 0.9855633289125907, "calibration/batch_entropy_50bins": 0.9780025735821166, "calibration/batch_uniqueness": 0.9603877337790866, "calibration/buffer_distribution_entropy": 0.9981646752348159, "calibration/buffer_entropy_100bins": 0.9893824626518759, "calibration/buffer_entropy_10bins": 0.9981646752348159, "calibration/buffer_entropy_50bins": 0.9944247124897367, "calibration/confidence_entropy": 0.49413471824349064, "calibration/coverage@0%": 0.011331182729941292, "calibration/coverage@1%": 0.0863311827299413, "calibration/coverage@10%": 0.2082061827299413, "calibration/coverage@15%": 0.24805222602739727, "calibration/coverage@20%": 0.36997767857142855, "calibration/coverage@25%": 0.49003791585127204, "calibration/coverage@30%": 0.6072965080724071, "calibration/coverage@5%": 0.1652374327299413, "calibration/ece": 0.13711708315742724, "calibration/mean_confidence": 0.48495450226497266, "calibration/prompt_uniqueness": 0.8678187089457596, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 782.2, "completions/max_terminated_length": 617.6, "completions/mean_length": 185.22158203125, "completions/mean_terminated_length": 185.09028015136718, "completions/min_length": 81.4, "completions/min_terminated_length": 81.4, "epoch": 0.56, "grad_norm": 0.0008502820273861289, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 585106588.0, "reward": 0.9321273326873779, "reward_std": 0.08039158433675767, "rewards/accuracy_reward": 0.53154296875, "rewards/brier_reward": 0.8001036405563354, "rewards/confidence_uniqueness_reward": 0.9595265865325928, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002705985214561224, "rewards/frontier_coverage_0": 0.11961866915225983, "rewards/frontier_coverage_1": 0.11961866915225983, "rewards/frontier_coverage_10": 0.11961866915225983, "rewards/frontier_coverage_15": 0.11961866915225983, "rewards/frontier_coverage_20": 0.1191520243883133, "rewards/frontier_coverage_25": 0.08837539106607437, "rewards/frontier_coverage_5": 0.11961866915225983, "rewards/frontier_ece_reward": 0.006058618426322937, "rewards/frontier_entropy_batch_reward": -0.20200627744197847, "signal/accuracy_reward/centered_abs_mean": 0.082733154296875, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.11375210285186768, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0413665771484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0413665771484375, "signal/advantage_abs_mean": 0.06228437945246697, "signal/advantage_pre_scale_abs_mean": 0.06228437945246697, "signal/advantage_pre_scale_std": 0.09934655725955963, "signal/advantage_std": 0.09934655725955963, "signal/brier_reward/centered_abs_mean": 0.12185298353433609, "signal/brier_reward/group_bin_occupancy": 0.84296875, "signal/brier_reward/group_std_mean": 0.15725724995136262, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012185298651456834, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012185298651456834, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01168802659958601, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.014986979961395263, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011688026832416653, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011688026832416653, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027294772677123546, "signal/frontier_aurc_reward/group_bin_occupancy": 0.692578125, "signal/frontier_aurc_reward/group_std_mean": 0.004910151939839125, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.411846555536613e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.411846555536613e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16406928300857543, "signal/frontier_coverage_0/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_0/group_std_mean": 0.2110010415315628, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_1/centered_abs_mean": 0.16406928300857543, "signal/frontier_coverage_1/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_1/group_std_mean": 0.2110010415315628, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_10/centered_abs_mean": 0.16406928300857543, "signal/frontier_coverage_10/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_10/group_std_mean": 0.2110010415315628, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_15/centered_abs_mean": 0.16406928300857543, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.2110010415315628, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_20/centered_abs_mean": 0.16182146072387696, "signal/frontier_coverage_20/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_20/group_std_mean": 0.2081581711769104, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020227682311087848, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020227682311087848, "signal/frontier_coverage_25/centered_abs_mean": 0.1146465077996254, "signal/frontier_coverage_25/group_bin_occupancy": 0.871875, "signal/frontier_coverage_25/group_std_mean": 0.14809595346450805, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014330813428387046, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014330813428387046, "signal/frontier_coverage_5/centered_abs_mean": 0.16406928300857543, "signal/frontier_coverage_5/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_5/group_std_mean": 0.2110010415315628, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020508660934865476, "signal/frontier_ece_reward/centered_abs_mean": 0.009023293852806091, "signal/frontier_ece_reward/group_bin_occupancy": 0.87109375, "signal/frontier_ece_reward/group_std_mean": 0.012150265648961068, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009023294202052057, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009023294202052057, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2724481761455536, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3477316856384277, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027244817838072775, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027244817838072775, "step": 175 }, { "calibration/aurc": 0.309636001445996, "calibration/batch_distribution_entropy": 0.9839093996134368, "calibration/batch_entropy_100bins": 0.9625499427091299, "calibration/batch_entropy_10bins": 0.9839093996134368, "calibration/batch_entropy_50bins": 0.972969015575497, "calibration/batch_uniqueness": 0.960882568359375, "calibration/buffer_distribution_entropy": 0.998281991860735, "calibration/buffer_entropy_100bins": 0.9894724328721299, "calibration/buffer_entropy_10bins": 0.998281991860735, "calibration/buffer_entropy_50bins": 0.9944521850113789, "calibration/confidence_entropy": 0.49251360413414175, "calibration/coverage@0%": 0.010546875, "calibration/coverage@1%": 0.010546875, "calibration/coverage@10%": 0.137890625, "calibration/coverage@15%": 0.208984375, "calibration/coverage@20%": 0.284765625, "calibration/coverage@25%": 0.343359375, "calibration/coverage@30%": 0.46015625, "calibration/coverage@5%": 0.0421875, "calibration/ece": 0.08670626321827299, "calibration/mean_confidence": 0.489370553214073, "calibration/prompt_uniqueness": 0.857275390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 735.4, "completions/max_terminated_length": 592.0, "completions/mean_length": 181.85361328125, "completions/mean_terminated_length": 181.58960876464843, "completions/min_length": 81.6, "completions/min_terminated_length": 81.6, "epoch": 0.576, "grad_norm": 0.0010482225334271789, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 602155393.0, "reward": 0.9271166443824768, "reward_std": 0.07998622953891754, "rewards/accuracy_reward": 0.52412109375, "rewards/brier_reward": 0.7888967275619507, "rewards/confidence_uniqueness_reward": 0.9595050811767578, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003116795467212796, "rewards/frontier_coverage_0": 0.1158630833029747, "rewards/frontier_coverage_1": 0.1158630833029747, "rewards/frontier_coverage_10": 0.1158630833029747, "rewards/frontier_coverage_15": 0.1158630833029747, "rewards/frontier_coverage_20": 0.11500565633177758, "rewards/frontier_coverage_25": 0.08373434320092202, "rewards/frontier_coverage_5": 0.1158630833029747, "rewards/frontier_ece_reward": 0.005350236594676971, "rewards/frontier_entropy_batch_reward": -0.1985933691263199, "signal/accuracy_reward/centered_abs_mean": 0.077752685546875, "signal/accuracy_reward/group_bin_occupancy": 0.166796875, "signal/accuracy_reward/group_std_mean": 0.1094050019979477, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0388763427734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0388763427734375, "signal/advantage_abs_mean": 0.06033368557691574, "signal/advantage_pre_scale_abs_mean": 0.06033368557691574, "signal/advantage_pre_scale_std": 0.09716939330101013, "signal/advantage_std": 0.09716939330101013, "signal/brier_reward/centered_abs_mean": 0.12069027125835419, "signal/brier_reward/group_bin_occupancy": 0.85546875, "signal/brier_reward/group_std_mean": 0.1551447778940201, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012069026939570904, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012069026939570904, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01292349398136139, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016793293692171574, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292349398136139, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292349398136139, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028868647757917644, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7015625, "signal/frontier_aurc_reward/group_std_mean": 0.005015233065932989, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.608580991567578e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.608580991567578e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16046448349952697, "signal/frontier_coverage_0/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_0/group_std_mean": 0.20543249547481537, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_1/centered_abs_mean": 0.16046448349952697, "signal/frontier_coverage_1/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_1/group_std_mean": 0.20543249547481537, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_10/centered_abs_mean": 0.16046448349952697, "signal/frontier_coverage_10/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_10/group_std_mean": 0.20543249547481537, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_15/centered_abs_mean": 0.16046448349952697, "signal/frontier_coverage_15/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_15/group_std_mean": 0.20543249547481537, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_20/centered_abs_mean": 0.15846198201179504, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.20285292565822602, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019807748030871153, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019807748030871153, "signal/frontier_coverage_25/centered_abs_mean": 0.10855960100889206, "signal/frontier_coverage_25/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_25/group_std_mean": 0.13992275893688202, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001356995035894215, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001356995035894215, "signal/frontier_coverage_5/centered_abs_mean": 0.16046448349952697, "signal/frontier_coverage_5/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_5/group_std_mean": 0.20543249547481537, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002005806053057313, "signal/frontier_ece_reward/centered_abs_mean": 0.009020330384373665, "signal/frontier_ece_reward/group_bin_occupancy": 0.846875, "signal/frontier_ece_reward/group_std_mean": 0.012636875361204147, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009020330267958343, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009020330267958343, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2665239542722702, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34042556285858155, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026652396842837333, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026652396842837333, "step": 180 }, { "calibration/aurc": 0.29726541114295457, "calibration/batch_distribution_entropy": 0.9834822954947106, "calibration/batch_entropy_100bins": 0.9608169793074197, "calibration/batch_entropy_10bins": 0.9834822954947106, "calibration/batch_entropy_50bins": 0.9750636920076771, "calibration/batch_uniqueness": 0.9607329022889346, "calibration/buffer_distribution_entropy": 0.9983789319326808, "calibration/buffer_entropy_100bins": 0.9895114798338899, "calibration/buffer_entropy_10bins": 0.9983789319326808, "calibration/buffer_entropy_50bins": 0.9945117700392121, "calibration/confidence_entropy": 0.48444024753987314, "calibration/coverage@0%": 0.01800085616438356, "calibration/coverage@1%": 0.01800085616438356, "calibration/coverage@10%": 0.16278666218199608, "calibration/coverage@15%": 0.30757323263209396, "calibration/coverage@20%": 0.4346991193737769, "calibration/coverage@25%": 0.5273888515166341, "calibration/coverage@30%": 0.5982035836594912, "calibration/coverage@5%": 0.08451565557729941, "calibration/ece": 0.12100732825692273, "calibration/mean_confidence": 0.49416566136722384, "calibration/prompt_uniqueness": 0.8536409725383713, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1118.0, "completions/max_terminated_length": 492.2, "completions/mean_length": 176.89560546875, "completions/mean_terminated_length": 176.4981475830078, "completions/min_length": 79.8, "completions/min_terminated_length": 79.8, "epoch": 0.592, "grad_norm": 0.0010737936245277524, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 619134516.0, "reward": 0.9240444660186767, "reward_std": 0.0810801163315773, "rewards/accuracy_reward": 0.5212890625, "rewards/brier_reward": 0.790893018245697, "rewards/confidence_uniqueness_reward": 0.9601579666137695, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.00303261773660779, "rewards/frontier_coverage_0": 0.12167765200138092, "rewards/frontier_coverage_1": 0.12167765200138092, "rewards/frontier_coverage_10": 0.12167765200138092, "rewards/frontier_coverage_15": 0.12167765200138092, "rewards/frontier_coverage_20": 0.1199147269129753, "rewards/frontier_coverage_25": 0.08222576975822449, "rewards/frontier_coverage_5": 0.12167765200138092, "rewards/frontier_ece_reward": 0.0055978668853640555, "rewards/frontier_entropy_batch_reward": -0.22212174534797668, "signal/accuracy_reward/centered_abs_mean": 0.08709716796875, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.11671981066465378, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043548583984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.043548583984375, "signal/advantage_abs_mean": 0.0631883479654789, "signal/advantage_pre_scale_abs_mean": 0.0631883479654789, "signal/advantage_pre_scale_std": 0.09987544417381286, "signal/advantage_std": 0.09987544417381286, "signal/brier_reward/centered_abs_mean": 0.11597198843955994, "signal/brier_reward/group_bin_occupancy": 0.838671875, "signal/brier_reward/group_std_mean": 0.149802365899086, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011597198992967605, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011597198992967605, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012930301018059254, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.911328125, "signal/confidence_uniqueness_reward/group_std_mean": 0.017115654610097408, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012930301018059254, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012930301018059254, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002846223535016179, "signal/frontier_aurc_reward/group_bin_occupancy": 0.709765625, "signal/frontier_aurc_reward/group_std_mean": 0.004566754633560777, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.557779564289376e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.557779564289376e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1627124637365341, "signal/frontier_coverage_0/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_0/group_std_mean": 0.20693700313568114, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_1/centered_abs_mean": 0.1627124637365341, "signal/frontier_coverage_1/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_1/group_std_mean": 0.20693700313568114, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_10/centered_abs_mean": 0.1627124637365341, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.20693700313568114, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_15/centered_abs_mean": 0.1627124637365341, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.20693700313568114, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_20/centered_abs_mean": 0.15905381739139557, "signal/frontier_coverage_20/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_20/group_std_mean": 0.20235534608364106, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001988172740675509, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001988172740675509, "signal/frontier_coverage_25/centered_abs_mean": 0.10259814411401749, "signal/frontier_coverage_25/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_25/group_std_mean": 0.1315797194838524, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012824768433347344, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012824768433347344, "signal/frontier_coverage_5/centered_abs_mean": 0.1627124637365341, "signal/frontier_coverage_5/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_5/group_std_mean": 0.20693700313568114, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020339058246463537, "signal/frontier_ece_reward/centered_abs_mean": 0.009119224734604359, "signal/frontier_ece_reward/group_bin_occupancy": 0.8375, "signal/frontier_ece_reward/group_std_mean": 0.012649010121822356, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009119224967435002, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009119224967435002, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2906370997428894, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.723828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3651686549186707, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029063709452748297, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029063709452748297, "step": 185 }, { "calibration/aurc": 0.23078012934450687, "calibration/batch_distribution_entropy": 0.972169633516365, "calibration/batch_entropy_100bins": 0.9577764655227113, "calibration/batch_entropy_10bins": 0.972169633516365, "calibration/batch_entropy_50bins": 0.9698115325872496, "calibration/batch_uniqueness": 0.958111572265625, "calibration/buffer_distribution_entropy": 0.9983615562842496, "calibration/buffer_entropy_100bins": 0.989554537741407, "calibration/buffer_entropy_10bins": 0.9983615562842496, "calibration/buffer_entropy_50bins": 0.9944703301690406, "calibration/confidence_entropy": 0.4702111359575749, "calibration/coverage@0%": 0.074609375, "calibration/coverage@1%": 0.0796875, "calibration/coverage@10%": 0.249609375, "calibration/coverage@15%": 0.390234375, "calibration/coverage@20%": 0.530078125, "calibration/coverage@25%": 0.60859375, "calibration/coverage@30%": 0.70234375, "calibration/coverage@5%": 0.15078125, "calibration/ece": 0.11253724402326422, "calibration/mean_confidence": 0.4741178147270788, "calibration/prompt_uniqueness": 0.855712890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 956.2, "completions/max_terminated_length": 574.2, "completions/mean_length": 183.07431640625, "completions/mean_terminated_length": 182.80919494628907, "completions/min_length": 82.8, "completions/min_terminated_length": 82.8, "epoch": 0.608, "grad_norm": 0.0008162545855157077, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 636008685.0, "reward": 0.932918655872345, "reward_std": 0.07714778482913971, "rewards/accuracy_reward": 0.5296875, "rewards/brier_reward": 0.809378182888031, "rewards/confidence_uniqueness_reward": 0.959358549118042, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002335884002968669, "rewards/frontier_coverage_0": 0.13527322113513945, "rewards/frontier_coverage_1": 0.13527322113513945, "rewards/frontier_coverage_10": 0.13527322113513945, "rewards/frontier_coverage_15": 0.13527322113513945, "rewards/frontier_coverage_20": 0.12897036075592042, "rewards/frontier_coverage_25": 0.08629466593265533, "rewards/frontier_coverage_5": 0.13527322113513945, "rewards/frontier_ece_reward": 0.006201074831187725, "rewards/frontier_entropy_batch_reward": -0.20437394380569457, "signal/accuracy_reward/centered_abs_mean": 0.08509521484375, "signal/accuracy_reward/group_bin_occupancy": 0.166796875, "signal/accuracy_reward/group_std_mean": 0.1144148737192154, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042547607421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042547607421875, "signal/advantage_abs_mean": 0.05944623276591301, "signal/advantage_pre_scale_abs_mean": 0.05944623276591301, "signal/advantage_pre_scale_std": 0.09432210624217988, "signal/advantage_std": 0.09432210624217988, "signal/brier_reward/centered_abs_mean": 0.11613385826349258, "signal/brier_reward/group_bin_occupancy": 0.843359375, "signal/brier_reward/group_std_mean": 0.14919577836990355, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011613386496901513, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011613386496901513, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012426980212330819, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9140625, "signal/confidence_uniqueness_reward/group_std_mean": 0.0162442235276103, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012426980305463077, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012426980305463077, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002181270159780979, "signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625, "signal/frontier_aurc_reward/group_std_mean": 0.003462765412405133, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.726587808865588e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.726587808865588e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17015551626682282, "signal/frontier_coverage_0/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_0/group_std_mean": 0.2166207551956177, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_1/centered_abs_mean": 0.17015551626682282, "signal/frontier_coverage_1/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_1/group_std_mean": 0.2166207551956177, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_10/centered_abs_mean": 0.17015551626682282, "signal/frontier_coverage_10/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_10/group_std_mean": 0.2166207551956177, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_15/centered_abs_mean": 0.17015551626682282, "signal/frontier_coverage_15/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_15/group_std_mean": 0.2166207551956177, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_20/centered_abs_mean": 0.16215289533138275, "signal/frontier_coverage_20/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_20/group_std_mean": 0.20657850205898284, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020269112894311546, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020269112894311546, "signal/frontier_coverage_25/centered_abs_mean": 0.09889246076345444, "signal/frontier_coverage_25/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_25/group_std_mean": 0.1259763240814209, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012361557688564061, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012361557688564061, "signal/frontier_coverage_5/centered_abs_mean": 0.17015551626682282, "signal/frontier_coverage_5/group_bin_occupancy": 0.86015625, "signal/frontier_coverage_5/group_std_mean": 0.2166207551956177, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002126943925395608, "signal/frontier_ece_reward/centered_abs_mean": 0.008855049218982457, "signal/frontier_ece_reward/group_bin_occupancy": 0.825, "signal/frontier_ece_reward/group_std_mean": 0.012453357130289078, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008855049381963909, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008855049381963909, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709381639957428, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3473371982574463, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02709381692111492, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02709381692111492, "step": 190 }, { "calibration/aurc": 0.24190303576000538, "calibration/batch_distribution_entropy": 0.9851080109571406, "calibration/batch_entropy_100bins": 0.9660194285586039, "calibration/batch_entropy_10bins": 0.9851080109571406, "calibration/batch_entropy_50bins": 0.9779150487479604, "calibration/batch_uniqueness": 0.9610351488052915, "calibration/buffer_distribution_entropy": 0.9984440017454219, "calibration/buffer_entropy_100bins": 0.98965536789939, "calibration/buffer_entropy_10bins": 0.9984440017454219, "calibration/buffer_entropy_50bins": 0.9944964147279853, "calibration/confidence_entropy": 0.4965098505390208, "calibration/coverage@0%": 0.01917196673189824, "calibration/coverage@1%": 0.01917196673189824, "calibration/coverage@10%": 0.12628424657534248, "calibration/coverage@15%": 0.30368685787671235, "calibration/coverage@20%": 0.44866530088062617, "calibration/coverage@25%": 0.5823018590998043, "calibration/coverage@30%": 0.6850691046966733, "calibration/coverage@5%": 0.048483365949119374, "calibration/ece": 0.08340996381299517, "calibration/mean_confidence": 0.516178190966678, "calibration/prompt_uniqueness": 0.8672639686036681, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 779.6, "completions/max_terminated_length": 570.2, "completions/mean_length": 184.97431640625, "completions/mean_terminated_length": 184.84255981445312, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.624, "grad_norm": 0.0011064645368605852, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 653246726.0, "reward": 0.9357229709625244, "reward_std": 0.08368170112371445, "rewards/accuracy_reward": 0.5328125, "rewards/brier_reward": 0.8055280208587646, "rewards/confidence_uniqueness_reward": 0.9595144271850586, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0028843384236097334, "rewards/frontier_coverage_0": 0.12474274337291717, "rewards/frontier_coverage_1": 0.12474274337291717, "rewards/frontier_coverage_10": 0.12474274337291717, "rewards/frontier_coverage_15": 0.12449503540992737, "rewards/frontier_coverage_20": 0.11553706079721451, "rewards/frontier_coverage_25": 0.07678574174642563, "rewards/frontier_coverage_5": 0.12474274337291717, "rewards/frontier_ece_reward": 0.005106198182329535, "rewards/frontier_entropy_batch_reward": -0.1781061351299286, "signal/accuracy_reward/centered_abs_mean": 0.09542236328125, "signal/accuracy_reward/group_bin_occupancy": 0.172265625, "signal/accuracy_reward/group_std_mean": 0.127110655605793, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047711181640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047711181640625, "signal/advantage_abs_mean": 0.06504265516996384, "signal/advantage_pre_scale_abs_mean": 0.06504265516996384, "signal/advantage_pre_scale_std": 0.10494562834501267, "signal/advantage_std": 0.10494562834501267, "signal/brier_reward/centered_abs_mean": 0.11675633937120437, "signal/brier_reward/group_bin_occupancy": 0.855859375, "signal/brier_reward/group_std_mean": 0.1501062899827957, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011675634235143662, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011675634235143662, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011868251860141754, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.928125, "signal/confidence_uniqueness_reward/group_std_mean": 0.015202015824615955, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011868252186104655, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011868252186104655, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027540235314518213, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70546875, "signal/frontier_aurc_reward/group_std_mean": 0.004497009515762329, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442529414314777e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442529414314777e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15991105139255524, "signal/frontier_coverage_0/group_bin_occupancy": 0.88125, "signal/frontier_coverage_0/group_std_mean": 0.2048025608062744, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_1/centered_abs_mean": 0.15991105139255524, "signal/frontier_coverage_1/group_bin_occupancy": 0.88125, "signal/frontier_coverage_1/group_std_mean": 0.2048025608062744, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_10/centered_abs_mean": 0.15991105139255524, "signal/frontier_coverage_10/group_bin_occupancy": 0.88125, "signal/frontier_coverage_10/group_std_mean": 0.2048025608062744, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_15/centered_abs_mean": 0.15969133675098418, "signal/frontier_coverage_15/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_15/group_std_mean": 0.20454807877540587, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001996141788549721, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001996141788549721, "signal/frontier_coverage_20/centered_abs_mean": 0.1426139533519745, "signal/frontier_coverage_20/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_20/group_std_mean": 0.18301699459552764, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00178267452865839, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00178267452865839, "signal/frontier_coverage_25/centered_abs_mean": 0.08272561132907867, "signal/frontier_coverage_25/group_bin_occupancy": 0.899609375, "signal/frontier_coverage_25/group_std_mean": 0.10734816044569015, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001034070155583322, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001034070155583322, "signal/frontier_coverage_5/centered_abs_mean": 0.15991105139255524, "signal/frontier_coverage_5/group_bin_occupancy": 0.88125, "signal/frontier_coverage_5/group_std_mean": 0.2048025608062744, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019988882122561336, "signal/frontier_ece_reward/centered_abs_mean": 0.00812565665692091, "signal/frontier_ece_reward/group_bin_occupancy": 0.83671875, "signal/frontier_ece_reward/group_std_mean": 0.011584336683154107, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008125656750053167, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008125656750053167, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2498374253511429, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3267929255962372, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024983742833137514, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024983742833137514, "step": 195 }, { "calibration/aurc": 0.26512359032820576, "calibration/batch_distribution_entropy": 0.9819760604956709, "calibration/batch_entropy_100bins": 0.961525383497633, "calibration/batch_entropy_10bins": 0.9819760604956709, "calibration/batch_entropy_50bins": 0.9735273980284143, "calibration/batch_uniqueness": 0.9592022574135403, "calibration/buffer_distribution_entropy": 0.9985324674567476, "calibration/buffer_entropy_100bins": 0.9898235209789318, "calibration/buffer_entropy_10bins": 0.9985324674567476, "calibration/buffer_entropy_50bins": 0.9945705588322357, "calibration/confidence_entropy": 0.5043373180268479, "calibration/coverage@0%": 0.06848550636007827, "calibration/coverage@1%": 0.07318217954990215, "calibration/coverage@10%": 0.3003011863992172, "calibration/coverage@15%": 0.35930925880626224, "calibration/coverage@20%": 0.3980078889432485, "calibration/coverage@25%": 0.4449081152152642, "calibration/coverage@30%": 0.6000076443248532, "calibration/coverage@5%": 0.23814135885518595, "calibration/ece": 0.16721492077336086, "calibration/mean_confidence": 0.5371362845790759, "calibration/prompt_uniqueness": 0.8633109228668054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 1131.8, "completions/max_terminated_length": 729.0, "completions/mean_length": 188.35107421875, "completions/mean_terminated_length": 187.03569641113282, "completions/min_length": 82.4, "completions/min_terminated_length": 82.4, "epoch": 0.64, "grad_norm": 0.0009473967947997153, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 670518129.0, "reward": 0.9468509316444397, "reward_std": 0.07844078540802002, "rewards/accuracy_reward": 0.5681640625, "rewards/brier_reward": 0.8030801296234131, "rewards/confidence_uniqueness_reward": 0.957956874370575, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.0027081962209194897, "rewards/frontier_coverage_0": 0.09533136114478111, "rewards/frontier_coverage_1": 0.09533136114478111, "rewards/frontier_coverage_10": 0.09533136114478111, "rewards/frontier_coverage_15": 0.0953597754240036, "rewards/frontier_coverage_20": 0.08704339265823365, "rewards/frontier_coverage_25": 0.05847667083144188, "rewards/frontier_coverage_5": 0.09533136114478111, "rewards/frontier_ece_reward": 0.005162352602928877, "rewards/frontier_entropy_batch_reward": -0.21057653427124023, "signal/accuracy_reward/centered_abs_mean": 0.07152099609375, "signal/accuracy_reward/group_bin_occupancy": 0.162109375, "signal/accuracy_reward/group_std_mean": 0.09872582405805588, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035760498046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035760498046875, "signal/advantage_abs_mean": 0.0600838340818882, "signal/advantage_pre_scale_abs_mean": 0.0600838340818882, "signal/advantage_pre_scale_std": 0.09821470826864243, "signal/advantage_std": 0.09821470826864243, "signal/brier_reward/centered_abs_mean": 0.11101796627044677, "signal/brier_reward/group_bin_occupancy": 0.848828125, "signal/brier_reward/group_std_mean": 0.14420543015003204, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011101796850562095, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011101796850562095, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013335288688540458, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01753148380666971, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013335288735106588, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013335288735106588, "signal/format_reward/centered_abs_mean": 0.001739501953125, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0030320982448756697, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008697509765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008697509765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027640830259770153, "signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375, "signal/frontier_aurc_reward/group_std_mean": 0.004512441391125321, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.455103724263609e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.455103724263609e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.139675572514534, "signal/frontier_coverage_0/group_bin_occupancy": 0.875, "signal/frontier_coverage_0/group_std_mean": 0.18167279958724974, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_1/centered_abs_mean": 0.139675572514534, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.18167279958724974, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_10/centered_abs_mean": 0.139675572514534, "signal/frontier_coverage_10/group_bin_occupancy": 0.875, "signal/frontier_coverage_10/group_std_mean": 0.18167279958724974, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_15/centered_abs_mean": 0.1394558221101761, "signal/frontier_coverage_15/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_15/group_std_mean": 0.18138521909713745, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017431978834792972, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017431978834792972, "signal/frontier_coverage_20/centered_abs_mean": 0.1208455815911293, "signal/frontier_coverage_20/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_20/group_std_mean": 0.15725071132183074, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015105698024854065, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015105698024854065, "signal/frontier_coverage_25/centered_abs_mean": 0.07018533274531365, "signal/frontier_coverage_25/group_bin_occupancy": 0.8984375, "signal/frontier_coverage_25/group_std_mean": 0.09095044732093811, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008773167035542428, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008773167035542428, "signal/frontier_coverage_5/centered_abs_mean": 0.139675572514534, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.18167279958724974, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017459447728469968, "signal/frontier_ece_reward/centered_abs_mean": 0.008315538614988327, "signal/frontier_ece_reward/group_bin_occupancy": 0.822265625, "signal/frontier_ece_reward/group_std_mean": 0.01194094903767109, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008315538754686714, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008315538754686714, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27706546187400816, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35140617489814757, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02770654745399952, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02770654745399952, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.4566159075334175, "eval_calibration/batch_distribution_entropy": 0.9163126557855614, "eval_calibration/batch_entropy_100bins": 0.7178744913412122, "eval_calibration/batch_entropy_10bins": 0.9163126557855614, "eval_calibration/batch_entropy_50bins": 0.798684377132812, "eval_calibration/batch_uniqueness": 0.904296875, "eval_calibration/buffer_distribution_entropy": 0.9985671709970653, "eval_calibration/buffer_entropy_100bins": 0.9900184255261969, "eval_calibration/buffer_entropy_10bins": 0.9985671709970653, "eval_calibration/buffer_entropy_50bins": 0.9946421013154033, "eval_calibration/confidence_entropy": 0.5005098902291641, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.1875, "eval_calibration/coverage@25%": 0.2109375, "eval_calibration/coverage@30%": 0.25, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.19819265669162003, "eval_calibration/mean_confidence": 0.44646902260571164, "eval_calibration/prompt_uniqueness": 0.904296875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 373.25, "eval_completions/max_terminated_length": 373.25, "eval_completions/mean_length": 191.53293228149414, "eval_completions/mean_terminated_length": 191.53293228149414, "eval_completions/min_length": 95.5, "eval_completions/min_terminated_length": 95.5, "eval_loss": 0.0, "eval_num_tokens": 670518129.0, "eval_reward": 0.799683153629303, "eval_reward_std": 0.22493423148989677, "eval_rewards/accuracy_reward": 0.4296875, "eval_rewards/brier_reward": 0.7988216429948807, "eval_rewards/confidence_uniqueness_reward": 0.89794921875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0034247017465531826, "eval_rewards/frontier_coverage_0": 0.18689577654004097, "eval_rewards/frontier_coverage_1": 0.18689577654004097, "eval_rewards/frontier_coverage_10": 0.18689577654004097, "eval_rewards/frontier_coverage_15": 0.18632838502526283, "eval_rewards/frontier_coverage_20": 0.1586691550910473, "eval_rewards/frontier_coverage_25": 0.08706778101623058, "eval_rewards/frontier_coverage_5": 0.18689577654004097, "eval_rewards/frontier_ece_reward": 0.004595339996740222, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 19.8251, "eval_samples_per_second": 25.22, "eval_signal/accuracy_reward/centered_abs_mean": 0.47509765625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49481892585754395, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.237548828125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.237548828125, "eval_signal/advantage_abs_mean": 0.21163957193493843, "eval_signal/advantage_pre_scale_abs_mean": 0.21163957193493843, "eval_signal/advantage_pre_scale_std": 0.2224200740456581, "eval_signal/advantage_std": 0.2224200740456581, "eval_signal/brier_reward/centered_abs_mean": 0.18079102784395218, "eval_signal/brier_reward/group_bin_occupancy": 0.8828125, "eval_signal/brier_reward/group_std_mean": 0.2304544784128666, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018079102504998446, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018079102504998446, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0389862060546875, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.046543585136532784, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003898620721884072, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003898620721884072, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004244803451001644, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6015625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008210767526179552, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.306004641170148e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.306004641170148e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.36475419253110886, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_0/group_std_mean": 0.4384455382823944, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36475419253110886, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_1/group_std_mean": 0.4384455382823944, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.36475419253110886, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_10/group_std_mean": 0.4384455382823944, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3635733351111412, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_15/group_std_mean": 0.43705061078071594, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004544666619040072, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004544666619040072, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3101271614432335, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.984375, "eval_signal/frontier_coverage_20/group_std_mean": 0.37565645575523376, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038765897625125945, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038765897625125945, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.15158939361572266, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_25/group_std_mean": 0.19350523501634598, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018948675133287907, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018948675133287907, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.36475419253110886, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_5/group_std_mean": 0.4384455382823944, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004559427383355796, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.00890616630204022, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.890625, "eval_signal/frontier_ece_reward/group_std_mean": 0.013190251076593995, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008906166476663202, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008906166476663202, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.202, "step": 200 }, { "calibration/aurc": 0.41371530585067084, "calibration/batch_distribution_entropy": 0.968526477144453, "calibration/batch_entropy_100bins": 0.9606235974216354, "calibration/batch_entropy_10bins": 0.968526477144453, "calibration/batch_entropy_50bins": 0.9681260324091415, "calibration/batch_uniqueness": 0.9544647216796875, "calibration/buffer_distribution_entropy": 0.9986671560208302, "calibration/buffer_entropy_100bins": 0.9902706960336737, "calibration/buffer_entropy_10bins": 0.9986671560208302, "calibration/buffer_entropy_50bins": 0.9947728709798141, "calibration/confidence_entropy": 0.5252373594473655, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.0140625, "calibration/coverage@15%": 0.06171875, "calibration/coverage@20%": 0.1109375, "calibration/coverage@25%": 0.179296875, "calibration/coverage@30%": 0.28984375, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.10538980298983983, "calibration/mean_confidence": 0.4486731517153184, "calibration/prompt_uniqueness": 0.8537109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 558.0, "completions/max_terminated_length": 558.0, "completions/mean_length": 188.62841796875, "completions/mean_terminated_length": 188.62841796875, "completions/min_length": 86.2, "completions/min_terminated_length": 86.2, "epoch": 0.656, "grad_norm": 0.0009614454465918243, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 687306228.0, "reward": 0.9128621459007263, "reward_std": 0.08489621281623841, "rewards/accuracy_reward": 0.49951171875, "rewards/brier_reward": 0.7791517615318299, "rewards/confidence_uniqueness_reward": 0.9559079051017761, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00292632021009922, "rewards/frontier_coverage_0": 0.11452654302120209, "rewards/frontier_coverage_1": 0.11452654302120209, "rewards/frontier_coverage_10": 0.11452654302120209, "rewards/frontier_coverage_15": 0.11425123661756516, "rewards/frontier_coverage_20": 0.0982695385813713, "rewards/frontier_coverage_25": 0.05823923796415329, "rewards/frontier_coverage_5": 0.11452654302120209, "rewards/frontier_ece_reward": 0.0032087708823382854, "rewards/frontier_entropy_batch_reward": -0.1974597692489624, "signal/accuracy_reward/centered_abs_mean": 0.094403076171875, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.12853155434131622, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0472015380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0472015380859375, "signal/advantage_abs_mean": 0.0651530534029007, "signal/advantage_pre_scale_abs_mean": 0.0651530534029007, "signal/advantage_pre_scale_std": 0.1036272794008255, "signal/advantage_std": 0.1036272794008255, "signal/brier_reward/centered_abs_mean": 0.12340695858001709, "signal/brier_reward/group_bin_occupancy": 0.872265625, "signal/brier_reward/group_std_mean": 0.15771982073783875, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012340695783495902, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012340695783495902, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012716875597834586, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016173630580306055, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012716875644400716, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012716875644400716, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025363420136272907, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7109375, "signal/frontier_aurc_reward/group_std_mean": 0.004503958486020565, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.170427517034114e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.170427517034114e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1702498823404312, "signal/frontier_coverage_0/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_0/group_std_mean": 0.2185587167739868, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_1/centered_abs_mean": 0.1702498823404312, "signal/frontier_coverage_1/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_1/group_std_mean": 0.2185587167739868, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_10/centered_abs_mean": 0.1702498823404312, "signal/frontier_coverage_10/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_10/group_std_mean": 0.2185587167739868, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_15/centered_abs_mean": 0.1695919394493103, "signal/frontier_coverage_15/group_bin_occupancy": 0.88671875, "signal/frontier_coverage_15/group_std_mean": 0.21770275235176087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021198994014412164, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021198994014412164, "signal/frontier_coverage_20/centered_abs_mean": 0.1464843899011612, "signal/frontier_coverage_20/group_bin_occupancy": 0.88125, "signal/frontier_coverage_20/group_std_mean": 0.18815037310123445, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018310548504814506, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018310548504814506, "signal/frontier_coverage_25/centered_abs_mean": 0.0794641137123108, "signal/frontier_coverage_25/group_bin_occupancy": 0.9, "signal/frontier_coverage_25/group_std_mean": 0.10283097177743912, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009933014633134007, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009933014633134007, "signal/frontier_coverage_5/centered_abs_mean": 0.1702498823404312, "signal/frontier_coverage_5/group_bin_occupancy": 0.887109375, "signal/frontier_coverage_5/group_std_mean": 0.2185587167739868, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021281236317008735, "signal/frontier_ece_reward/centered_abs_mean": 0.006669469363987446, "signal/frontier_ece_reward/group_bin_occupancy": 0.82109375, "signal/frontier_ece_reward/group_std_mean": 0.009653137251734733, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006669469643384218, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006669469643384218, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26615132987499235, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3430874884128571, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026615133881568907, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026615133881568907, "step": 205 }, { "calibration/aurc": 0.2996998322733083, "calibration/batch_distribution_entropy": 0.9712911308832048, "calibration/batch_entropy_100bins": 0.9581911832477289, "calibration/batch_entropy_10bins": 0.9712911308832048, "calibration/batch_entropy_50bins": 0.9694983488466955, "calibration/batch_uniqueness": 0.9568363156374307, "calibration/buffer_distribution_entropy": 0.9989196857105181, "calibration/buffer_entropy_100bins": 0.9907594188460136, "calibration/buffer_entropy_10bins": 0.9989196857105181, "calibration/buffer_entropy_50bins": 0.9950373549425547, "calibration/confidence_entropy": 0.49656147261144473, "calibration/coverage@0%": 0.01328125, "calibration/coverage@1%": 0.01328125, "calibration/coverage@10%": 0.12422639432485323, "calibration/coverage@15%": 0.16836701932485323, "calibration/coverage@20%": 0.2230545193248532, "calibration/coverage@25%": 0.3297280149217221, "calibration/coverage@30%": 0.4953736545988258, "calibration/coverage@5%": 0.031640625, "calibration/ece": 0.13407407064959234, "calibration/mean_confidence": 0.4797791452804888, "calibration/prompt_uniqueness": 0.8583812638202394, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 828.0, "completions/max_terminated_length": 688.2, "completions/mean_length": 188.770703125, "completions/mean_terminated_length": 188.6397918701172, "completions/min_length": 87.4, "completions/min_terminated_length": 87.4, "epoch": 0.672, "grad_norm": 0.0009502097382210195, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 704152680.0, "reward": 0.9265612006187439, "reward_std": 0.07872170060873032, "rewards/accuracy_reward": 0.5279296875, "rewards/brier_reward": 0.7858775019645691, "rewards/confidence_uniqueness_reward": 0.9564463257789612, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00274044550023973, "rewards/frontier_coverage_0": 0.11836416125297547, "rewards/frontier_coverage_1": 0.11836416125297547, "rewards/frontier_coverage_10": 0.11836416125297547, "rewards/frontier_coverage_15": 0.11800117641687394, "rewards/frontier_coverage_20": 0.10758722573518753, "rewards/frontier_coverage_25": 0.0648583009839058, "rewards/frontier_coverage_5": 0.11836416125297547, "rewards/frontier_ece_reward": 0.003610279364511371, "rewards/frontier_entropy_batch_reward": -0.21462770104408263, "signal/accuracy_reward/centered_abs_mean": 0.08948974609375, "signal/accuracy_reward/group_bin_occupancy": 0.167578125, "signal/accuracy_reward/group_std_mean": 0.11885513663291931, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044744873046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044744873046875, "signal/advantage_abs_mean": 0.060820522159337996, "signal/advantage_pre_scale_abs_mean": 0.060820522159337996, "signal/advantage_pre_scale_std": 0.09661759734153748, "signal/advantage_std": 0.09661759734153748, "signal/brier_reward/centered_abs_mean": 0.1223609670996666, "signal/brier_reward/group_bin_occupancy": 0.8421875, "signal/brier_reward/group_std_mean": 0.15609249770641326, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012236096523702144, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012236096523702144, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013496090844273567, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.926953125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01729346551001072, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013496090890839697, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013496090890839697, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024223918560892345, "signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375, "signal/frontier_aurc_reward/group_std_mean": 0.004061613464727998, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.027989914698992e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.027989914698992e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17780146598815919, "signal/frontier_coverage_0/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_0/group_std_mean": 0.2251005709171295, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_1/centered_abs_mean": 0.17780146598815919, "signal/frontier_coverage_1/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_1/group_std_mean": 0.2251005709171295, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_10/centered_abs_mean": 0.17780146598815919, "signal/frontier_coverage_10/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_10/group_std_mean": 0.2251005709171295, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_15/centered_abs_mean": 0.17655244171619416, "signal/frontier_coverage_15/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_15/group_std_mean": 0.2235410749912262, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022069055587053297, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022069055587053297, "signal/frontier_coverage_20/centered_abs_mean": 0.15346194803714752, "signal/frontier_coverage_20/group_bin_occupancy": 0.85859375, "signal/frontier_coverage_20/group_std_mean": 0.19473823606967927, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019182743271812797, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019182743271812797, "signal/frontier_coverage_25/centered_abs_mean": 0.08100719451904297, "signal/frontier_coverage_25/group_bin_occupancy": 0.896875, "signal/frontier_coverage_25/group_std_mean": 0.10340845137834549, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010125899803824722, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010125899803824722, "signal/frontier_coverage_5/centered_abs_mean": 0.17780146598815919, "signal/frontier_coverage_5/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_5/group_std_mean": 0.2251005709171295, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002222518343478441, "signal/frontier_ece_reward/centered_abs_mean": 0.007257478311657906, "signal/frontier_ece_reward/group_bin_occupancy": 0.83125, "signal/frontier_ece_reward/group_std_mean": 0.010310792177915574, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007257478660903871, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007257478660903871, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2733268320560455, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.721484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34998972415924073, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027332685142755508, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027332685142755508, "step": 210 }, { "calibration/aurc": 0.334584462013137, "calibration/batch_distribution_entropy": 0.981975898164209, "calibration/batch_entropy_100bins": 0.9626993539936158, "calibration/batch_entropy_10bins": 0.981975898164209, "calibration/batch_entropy_50bins": 0.9758907463855657, "calibration/batch_uniqueness": 0.9594284057909366, "calibration/buffer_distribution_entropy": 0.9989722834263842, "calibration/buffer_entropy_100bins": 0.9911441184383936, "calibration/buffer_entropy_10bins": 0.9989722834263842, "calibration/buffer_entropy_50bins": 0.9952248695727167, "calibration/confidence_entropy": 0.49724886655592815, "calibration/coverage@0%": 0.0042976394324853225, "calibration/coverage@1%": 0.0042976394324853225, "calibration/coverage@10%": 0.041407014432485324, "calibration/coverage@15%": 0.13792196673189822, "calibration/coverage@20%": 0.3537006176614481, "calibration/coverage@25%": 0.41976363747553813, "calibration/coverage@30%": 0.6041638637475538, "calibration/coverage@5%": 0.019532014432485322, "calibration/ece": 0.13218861654477354, "calibration/mean_confidence": 0.5038023206802205, "calibration/prompt_uniqueness": 0.863943656103668, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1064.4, "completions/max_terminated_length": 782.0, "completions/mean_length": 191.0904296875, "completions/mean_terminated_length": 190.82761840820314, "completions/min_length": 85.8, "completions/min_terminated_length": 85.8, "epoch": 0.688, "grad_norm": 0.0010152794420719147, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 721063366.0, "reward": 0.9351613402366639, "reward_std": 0.08508041054010392, "rewards/accuracy_reward": 0.54443359375, "rewards/brier_reward": 0.7874362349510193, "rewards/confidence_uniqueness_reward": 0.9589925885200501, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003119149315170944, "rewards/frontier_coverage_0": 0.10003266781568527, "rewards/frontier_coverage_1": 0.10003266781568527, "rewards/frontier_coverage_10": 0.10003266781568527, "rewards/frontier_coverage_15": 0.09940593391656875, "rewards/frontier_coverage_20": 0.0850291058421135, "rewards/frontier_coverage_25": 0.048729277402162555, "rewards/frontier_coverage_5": 0.10003266781568527, "rewards/frontier_ece_reward": 0.004082085704430938, "rewards/frontier_entropy_batch_reward": -0.198372682929039, "signal/accuracy_reward/centered_abs_mean": 0.099102783203125, "signal/accuracy_reward/group_bin_occupancy": 0.1734375, "signal/accuracy_reward/group_std_mean": 0.13263332694768906, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495513916015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0495513916015625, "signal/advantage_abs_mean": 0.06450984179973603, "signal/advantage_pre_scale_abs_mean": 0.06450984179973603, "signal/advantage_pre_scale_std": 0.10448751300573349, "signal/advantage_std": 0.10448751300573349, "signal/brier_reward/centered_abs_mean": 0.12233641296625138, "signal/brier_reward/group_bin_occupancy": 0.85078125, "signal/brier_reward/group_std_mean": 0.15672328174114228, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012233641929924488, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012233641929924488, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012578487582504749, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9265625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016628415510058402, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012578487861901523, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012578487861901523, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002936544781550765, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7078125, "signal/frontier_aurc_reward/group_std_mean": 0.004950050543993711, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.670681326184422e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.670681326184422e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16857316195964814, "signal/frontier_coverage_0/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_0/group_std_mean": 0.21436219811439514, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_1/centered_abs_mean": 0.16857316195964814, "signal/frontier_coverage_1/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_1/group_std_mean": 0.21436219811439514, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_10/centered_abs_mean": 0.16857316195964814, "signal/frontier_coverage_10/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_10/group_std_mean": 0.21436219811439514, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_15/centered_abs_mean": 0.16726841926574706, "signal/frontier_coverage_15/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_15/group_std_mean": 0.21273342669010162, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00209085529204458, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00209085529204458, "signal/frontier_coverage_20/centered_abs_mean": 0.14143361896276474, "signal/frontier_coverage_20/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_20/group_std_mean": 0.1803019016981125, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017679202603176237, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017679202603176237, "signal/frontier_coverage_25/centered_abs_mean": 0.07234455198049546, "signal/frontier_coverage_25/group_bin_occupancy": 0.90390625, "signal/frontier_coverage_25/group_std_mean": 0.09286017566919327, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009043069556355476, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009043069556355476, "signal/frontier_coverage_5/centered_abs_mean": 0.16857316195964814, "signal/frontier_coverage_5/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_5/group_std_mean": 0.21436219811439514, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021071645431220533, "signal/frontier_ece_reward/centered_abs_mean": 0.007802222948521375, "signal/frontier_ece_reward/group_bin_occupancy": 0.825390625, "signal/frontier_ece_reward/group_std_mean": 0.011409426480531693, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007802223321050405, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007802223321050405, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2717812657356262, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34891357421875, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02717812769114971, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02717812769114971, "step": 215 }, { "calibration/aurc": 0.2431222558267499, "calibration/batch_distribution_entropy": 0.9704915481336872, "calibration/batch_entropy_100bins": 0.95218267752457, "calibration/batch_entropy_10bins": 0.9704915481336872, "calibration/batch_entropy_50bins": 0.9658659766924742, "calibration/batch_uniqueness": 0.9607759885101235, "calibration/buffer_distribution_entropy": 0.9988664602008029, "calibration/buffer_entropy_100bins": 0.9911952395475305, "calibration/buffer_entropy_10bins": 0.9988664602008029, "calibration/buffer_entropy_50bins": 0.9952006753696396, "calibration/confidence_entropy": 0.4901460074658397, "calibration/coverage@0%": 0.016022504892367905, "calibration/coverage@1%": 0.016022504892367905, "calibration/coverage@10%": 0.1179756298923679, "calibration/coverage@15%": 0.22891848091976516, "calibration/coverage@20%": 0.37267841854207434, "calibration/coverage@25%": 0.5801140533268102, "calibration/coverage@30%": 0.6719300391389432, "calibration/coverage@5%": 0.048053754892367906, "calibration/ece": 0.11020522398245616, "calibration/mean_confidence": 0.5436369682894794, "calibration/prompt_uniqueness": 0.8631135779786681, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 753.2, "completions/max_terminated_length": 586.6, "completions/mean_length": 189.27822265625, "completions/mean_terminated_length": 189.1458312988281, "completions/min_length": 83.6, "completions/min_terminated_length": 83.6, "epoch": 0.704, "grad_norm": 0.0008683862979523838, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 737867719.0, "reward": 0.9372278213500976, "reward_std": 0.08068549633026123, "rewards/accuracy_reward": 0.54736328125, "rewards/brier_reward": 0.7968594074249268, "rewards/confidence_uniqueness_reward": 0.9608587741851806, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003032087814062834, "rewards/frontier_coverage_0": 0.09889980629086495, "rewards/frontier_coverage_1": 0.09889980629086495, "rewards/frontier_coverage_10": 0.09889980629086495, "rewards/frontier_coverage_15": 0.09856819957494736, "rewards/frontier_coverage_20": 0.08530885577201844, "rewards/frontier_coverage_25": 0.051339687407016756, "rewards/frontier_coverage_5": 0.09889980629086495, "rewards/frontier_ece_reward": 0.004148419946432114, "rewards/frontier_entropy_batch_reward": -0.20438967049121856, "signal/accuracy_reward/centered_abs_mean": 0.083721923828125, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.11213247925043106, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418609619140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0418609619140625, "signal/advantage_abs_mean": 0.06281042322516442, "signal/advantage_pre_scale_abs_mean": 0.06281042322516442, "signal/advantage_pre_scale_std": 0.10061680972576141, "signal/advantage_std": 0.10061680972576141, "signal/brier_reward/centered_abs_mean": 0.11816587895154954, "signal/brier_reward/group_bin_occupancy": 0.853125, "signal/brier_reward/group_std_mean": 0.1513270229101181, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01181658823043108, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01181658823043108, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011747047305107117, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9234375, "signal/confidence_uniqueness_reward/group_std_mean": 0.015102297998964787, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001174704753793776, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001174704753793776, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002884101867675781, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71328125, "signal/frontier_aurc_reward/group_std_mean": 0.004772100504487753, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6051273491466417e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6051273491466417e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15133111774921418, "signal/frontier_coverage_0/group_bin_occupancy": 0.865625, "signal/frontier_coverage_0/group_std_mean": 0.1936686307191849, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_1/centered_abs_mean": 0.15133111774921418, "signal/frontier_coverage_1/group_bin_occupancy": 0.865625, "signal/frontier_coverage_1/group_std_mean": 0.1936686307191849, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_10/centered_abs_mean": 0.15133111774921418, "signal/frontier_coverage_10/group_bin_occupancy": 0.865625, "signal/frontier_coverage_10/group_std_mean": 0.1936686307191849, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_15/centered_abs_mean": 0.15005215704441072, "signal/frontier_coverage_15/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_15/group_std_mean": 0.19203002452850343, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018756520003080368, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018756520003080368, "signal/frontier_coverage_20/centered_abs_mean": 0.1224544808268547, "signal/frontier_coverage_20/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_20/group_std_mean": 0.15708767175674437, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015306809917092323, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015306809917092323, "signal/frontier_coverage_25/centered_abs_mean": 0.06395273804664611, "signal/frontier_coverage_25/group_bin_occupancy": 0.912890625, "signal/frontier_coverage_25/group_std_mean": 0.08253951072692871, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007994092302396894, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007994092302396894, "signal/frontier_coverage_5/centered_abs_mean": 0.15133111774921418, "signal/frontier_coverage_5/group_bin_occupancy": 0.865625, "signal/frontier_coverage_5/group_std_mean": 0.1936686307191849, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018916390370577573, "signal/frontier_ece_reward/centered_abs_mean": 0.007144089136272669, "signal/frontier_ece_reward/group_bin_occupancy": 0.826171875, "signal/frontier_ece_reward/group_std_mean": 0.010504491440951825, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007144089206121862, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007144089206121862, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.265256404876709, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.339794796705246, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02652563974261284, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02652563974261284, "step": 220 }, { "calibration/aurc": 0.23301790118207893, "calibration/batch_distribution_entropy": 0.9875498553783582, "calibration/batch_entropy_100bins": 0.9644342655540867, "calibration/batch_entropy_10bins": 0.9875498553783582, "calibration/batch_entropy_50bins": 0.9791999063823041, "calibration/batch_uniqueness": 0.9621734619140625, "calibration/buffer_distribution_entropy": 0.998861625242886, "calibration/buffer_entropy_100bins": 0.9913135794668897, "calibration/buffer_entropy_10bins": 0.998861625242886, "calibration/buffer_entropy_50bins": 0.9952531135075292, "calibration/confidence_entropy": 0.5181585521770237, "calibration/coverage@0%": 0.0671875, "calibration/coverage@1%": 0.08359375, "calibration/coverage@10%": 0.22265625, "calibration/coverage@15%": 0.281640625, "calibration/coverage@20%": 0.43515625, "calibration/coverage@25%": 0.585546875, "calibration/coverage@30%": 0.692578125, "calibration/coverage@5%": 0.160546875, "calibration/ece": 0.131002530713914, "calibration/mean_confidence": 0.5249371858462698, "calibration/prompt_uniqueness": 0.870068359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 731.4, "completions/max_terminated_length": 731.4, "completions/mean_length": 191.42177734375, "completions/mean_terminated_length": 191.42177734375, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.72, "grad_norm": 0.0009743034606799483, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 754837734.0, "reward": 0.9488389015197753, "reward_std": 0.08193524926900864, "rewards/accuracy_reward": 0.57021484375, "rewards/brier_reward": 0.8064169526100159, "rewards/confidence_uniqueness_reward": 0.9598495483398437, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0021567588206380605, "rewards/frontier_coverage_0": 0.08862596154212951, "rewards/frontier_coverage_1": 0.08862596154212951, "rewards/frontier_coverage_10": 0.08861215263605118, "rewards/frontier_coverage_15": 0.0873618446290493, "rewards/frontier_coverage_20": 0.07677424550056458, "rewards/frontier_coverage_25": 0.04842212200164795, "rewards/frontier_coverage_5": 0.08862596154212951, "rewards/frontier_ece_reward": 0.0034100091550499203, "rewards/frontier_entropy_batch_reward": -0.20297325849533082, "signal/accuracy_reward/centered_abs_mean": 0.086773681640625, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12210773676633835, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433868408203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0433868408203125, "signal/advantage_abs_mean": 0.061074144393205645, "signal/advantage_pre_scale_abs_mean": 0.061074144393205645, "signal/advantage_pre_scale_std": 0.09964745044708252, "signal/advantage_std": 0.09964745044708252, "signal/brier_reward/centered_abs_mean": 0.10820089429616928, "signal/brier_reward/group_bin_occupancy": 0.8640625, "signal/brier_reward/group_std_mean": 0.1395553916692734, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010820089280605317, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010820089280605317, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012313938140869141, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9390625, "signal/confidence_uniqueness_reward/group_std_mean": 0.015382156148552895, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012313938699662686, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012313938699662686, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019083557184785605, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70859375, "signal/frontier_aurc_reward/group_std_mean": 0.003126844298094511, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.385444749961607e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.385444749961607e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15149271190166474, "signal/frontier_coverage_0/group_bin_occupancy": 0.875, "signal/frontier_coverage_0/group_std_mean": 0.19624074995517732, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001893658982589841, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001893658982589841, "signal/frontier_coverage_1/centered_abs_mean": 0.15149271190166474, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.19624074995517732, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001893658982589841, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001893658982589841, "signal/frontier_coverage_10/centered_abs_mean": 0.15089576244354247, "signal/frontier_coverage_10/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_10/group_std_mean": 0.1954701155424118, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018861971329897642, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018861971329897642, "signal/frontier_coverage_15/centered_abs_mean": 0.14784342050552368, "signal/frontier_coverage_15/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_15/group_std_mean": 0.19153738617897034, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018480427097529173, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018480427097529173, "signal/frontier_coverage_20/centered_abs_mean": 0.11549332290887833, "signal/frontier_coverage_20/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_20/group_std_mean": 0.14993580281734467, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014436665922403335, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014436665922403335, "signal/frontier_coverage_25/centered_abs_mean": 0.0574177585542202, "signal/frontier_coverage_25/group_bin_occupancy": 0.91015625, "signal/frontier_coverage_25/group_std_mean": 0.07405912727117539, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007177219958975911, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007177219958975911, "signal/frontier_coverage_5/centered_abs_mean": 0.15149271190166474, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.19624074995517732, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001893658982589841, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001893658982589841, "signal/frontier_ece_reward/centered_abs_mean": 0.006441084947437048, "signal/frontier_ece_reward/group_bin_occupancy": 0.840625, "signal/frontier_ece_reward/group_std_mean": 0.009463933855295181, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006441084784455598, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006441084784455598, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27501477003097535, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3496582627296448, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027501478046178817, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027501478046178817, "step": 225 }, { "calibration/aurc": 0.25028817244424917, "calibration/batch_distribution_entropy": 0.9743689912131274, "calibration/batch_entropy_100bins": 0.9518182314907483, "calibration/batch_entropy_10bins": 0.9743689912131274, "calibration/batch_entropy_50bins": 0.9659027223777581, "calibration/batch_uniqueness": 0.9613949453726036, "calibration/buffer_distribution_entropy": 0.9989980442369921, "calibration/buffer_entropy_100bins": 0.9914707354706082, "calibration/buffer_entropy_10bins": 0.9989980442369921, "calibration/buffer_entropy_50bins": 0.9953458202834676, "calibration/confidence_entropy": 0.4951727113718006, "calibration/coverage@0%": 0.007818615459882583, "calibration/coverage@1%": 0.007818615459882583, "calibration/coverage@10%": 0.11291050024461839, "calibration/coverage@15%": 0.1961281494618395, "calibration/coverage@20%": 0.4301194043542075, "calibration/coverage@25%": 0.5449677409491194, "calibration/coverage@30%": 0.6873287671232877, "calibration/coverage@5%": 0.04455112524461839, "calibration/ece": 0.13390157547439757, "calibration/mean_confidence": 0.5367679906823735, "calibration/prompt_uniqueness": 0.8548456197970864, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 970.4, "completions/max_terminated_length": 763.0, "completions/mean_length": 190.246484375, "completions/mean_terminated_length": 190.1147430419922, "completions/min_length": 85.4, "completions/min_terminated_length": 85.4, "epoch": 0.736, "grad_norm": 0.0006948218797333539, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 771725442.0, "reward": 0.9490613460540771, "reward_std": 0.0752700299024582, "rewards/accuracy_reward": 0.5681640625, "rewards/brier_reward": 0.8009598612785339, "rewards/confidence_uniqueness_reward": 0.9616155385971069, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002585971378721297, "rewards/frontier_coverage_0": 0.09696303457021713, "rewards/frontier_coverage_1": 0.09696303457021713, "rewards/frontier_coverage_10": 0.09685205966234207, "rewards/frontier_coverage_15": 0.09585188180208207, "rewards/frontier_coverage_20": 0.08253547102212906, "rewards/frontier_coverage_25": 0.05373050421476364, "rewards/frontier_coverage_5": 0.09696303457021713, "rewards/frontier_ece_reward": 0.0037560143042355775, "rewards/frontier_entropy_batch_reward": -0.19320926070213318, "signal/accuracy_reward/centered_abs_mean": 0.0760009765625, "signal/accuracy_reward/group_bin_occupancy": 0.162890625, "signal/accuracy_reward/group_std_mean": 0.10297959595918656, "signal/accuracy_reward/group_zero_std_frac": 0.696875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03800048828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03800048828125, "signal/advantage_abs_mean": 0.058193684369325635, "signal/advantage_pre_scale_abs_mean": 0.058193684369325635, "signal/advantage_pre_scale_std": 0.09324042946100235, "signal/advantage_std": 0.09324042946100235, "signal/brier_reward/centered_abs_mean": 0.11048106700181962, "signal/brier_reward/group_bin_occupancy": 0.84609375, "signal/brier_reward/group_std_mean": 0.14282523095607758, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011048106662929057, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011048106662929057, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011998776532709598, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.912109375, "signal/confidence_uniqueness_reward/group_std_mean": 0.015458272024989127, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011998776812106372, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011998776812106372, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023550010519102216, "signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375, "signal/frontier_aurc_reward/group_std_mean": 0.004158449545502663, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9437512421282008e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9437512421282008e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15034229159355164, "signal/frontier_coverage_0/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_0/group_std_mean": 0.19372088611125945, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018792787101119756, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018792787101119756, "signal/frontier_coverage_1/centered_abs_mean": 0.15034229159355164, "signal/frontier_coverage_1/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_1/group_std_mean": 0.19372088611125945, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018792787101119756, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018792787101119756, "signal/frontier_coverage_10/centered_abs_mean": 0.1496051698923111, "signal/frontier_coverage_10/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_10/group_std_mean": 0.1928351491689682, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001870064646936953, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001870064646936953, "signal/frontier_coverage_15/centered_abs_mean": 0.1457270860671997, "signal/frontier_coverage_15/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_15/group_std_mean": 0.18802883327007294, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018215886317193507, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018215886317193507, "signal/frontier_coverage_20/centered_abs_mean": 0.11060539782047271, "signal/frontier_coverage_20/group_bin_occupancy": 0.860546875, "signal/frontier_coverage_20/group_std_mean": 0.1432916909456253, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013825674774125218, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013825674774125218, "signal/frontier_coverage_25/centered_abs_mean": 0.05945408642292023, "signal/frontier_coverage_25/group_bin_occupancy": 0.9125, "signal/frontier_coverage_25/group_std_mean": 0.0763387769460678, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007431761012412607, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007431761012412607, "signal/frontier_coverage_5/centered_abs_mean": 0.15034229159355164, "signal/frontier_coverage_5/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_5/group_std_mean": 0.19372088611125945, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018792787101119756, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018792787101119756, "signal/frontier_ece_reward/centered_abs_mean": 0.006753822509199381, "signal/frontier_ece_reward/group_bin_occupancy": 0.82734375, "signal/frontier_ece_reward/group_std_mean": 0.010070707648992538, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006753822672180831, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006753822672180831, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26861504912376405, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3452408015727997, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02686150446534157, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02686150446534157, "step": 230 }, { "calibration/aurc": 0.256449338221134, "calibration/batch_distribution_entropy": 0.968129743999475, "calibration/batch_entropy_100bins": 0.9535105366339899, "calibration/batch_entropy_10bins": 0.968129743999475, "calibration/batch_entropy_50bins": 0.9658034715240916, "calibration/batch_uniqueness": 0.959322589602819, "calibration/buffer_distribution_entropy": 0.9990122658119412, "calibration/buffer_entropy_100bins": 0.9914938682880787, "calibration/buffer_entropy_10bins": 0.9990122658119412, "calibration/buffer_entropy_50bins": 0.9952962057881454, "calibration/confidence_entropy": 0.4566113101334025, "calibration/coverage@0%": 0.014465355919765166, "calibration/coverage@1%": 0.014465355919765166, "calibration/coverage@10%": 0.17503516389432486, "calibration/coverage@15%": 0.3086740154109589, "calibration/coverage@20%": 0.44046370474559693, "calibration/coverage@25%": 0.5280225660469667, "calibration/coverage@30%": 0.6393850905088063, "calibration/coverage@5%": 0.059005014677103715, "calibration/ece": 0.13985793070933042, "calibration/mean_confidence": 0.4696687177806269, "calibration/prompt_uniqueness": 0.851551728342872, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 949.0, "completions/max_terminated_length": 544.6, "completions/mean_length": 188.6611328125, "completions/mean_terminated_length": 188.26605224609375, "completions/min_length": 87.2, "completions/min_terminated_length": 87.2, "epoch": 0.752, "grad_norm": 0.0008416337659582496, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 788884532.0, "reward": 0.9455557703971863, "reward_std": 0.08068245649337769, "rewards/accuracy_reward": 0.566015625, "rewards/brier_reward": 0.7956305265426635, "rewards/confidence_uniqueness_reward": 0.9623886108398437, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003054162277840078, "rewards/frontier_coverage_0": 0.10228811725974082, "rewards/frontier_coverage_1": 0.10228811725974082, "rewards/frontier_coverage_10": 0.10170512199401856, "rewards/frontier_coverage_15": 0.09902632944285869, "rewards/frontier_coverage_20": 0.07840342242270708, "rewards/frontier_coverage_25": 0.050577325746417046, "rewards/frontier_coverage_5": 0.10181083604693413, "rewards/frontier_ece_reward": 0.0037714077159762384, "rewards/frontier_entropy_batch_reward": -0.21397663354873658, "signal/accuracy_reward/centered_abs_mean": 0.0837158203125, "signal/accuracy_reward/group_bin_occupancy": 0.1671875, "signal/accuracy_reward/group_std_mean": 0.11319768130779266, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04185791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04185791015625, "signal/advantage_abs_mean": 0.06239245980978012, "signal/advantage_pre_scale_abs_mean": 0.06239245980978012, "signal/advantage_pre_scale_std": 0.10159500986337662, "signal/advantage_std": 0.10159500986337662, "signal/brier_reward/centered_abs_mean": 0.113985575735569, "signal/brier_reward/group_bin_occupancy": 0.82890625, "signal/brier_reward/group_std_mean": 0.14819374084472656, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011398557387292386, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011398557387292386, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012159938551485538, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91640625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01617111321538687, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012159939156845211, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012159939156845211, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029334662482142448, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625, "signal/frontier_aurc_reward/group_std_mean": 0.00483027109876275, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.666832781163975e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.666832781163975e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15212544202804565, "signal/frontier_coverage_0/group_bin_occupancy": 0.853125, "signal/frontier_coverage_0/group_std_mean": 0.19812886118888856, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019015680765733123, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019015680765733123, "signal/frontier_coverage_1/centered_abs_mean": 0.15212544202804565, "signal/frontier_coverage_1/group_bin_occupancy": 0.853125, "signal/frontier_coverage_1/group_std_mean": 0.19812886118888856, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019015680765733123, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019015680765733123, "signal/frontier_coverage_10/centered_abs_mean": 0.15115560591220856, "signal/frontier_coverage_10/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_10/group_std_mean": 0.19689476490020752, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018894450971856714, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018894450971856714, "signal/frontier_coverage_15/centered_abs_mean": 0.14616797864437103, "signal/frontier_coverage_15/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_15/group_std_mean": 0.1905912697315216, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001827099802903831, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001827099802903831, "signal/frontier_coverage_20/centered_abs_mean": 0.10996298342943192, "signal/frontier_coverage_20/group_bin_occupancy": 0.843359375, "signal/frontier_coverage_20/group_std_mean": 0.14416728615760804, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013745372649282216, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013745372649282216, "signal/frontier_coverage_25/centered_abs_mean": 0.05818985775113106, "signal/frontier_coverage_25/group_bin_occupancy": 0.911328125, "signal/frontier_coverage_25/group_std_mean": 0.07572825103998185, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007273732335306704, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007273732335306704, "signal/frontier_coverage_5/centered_abs_mean": 0.15179600417613984, "signal/frontier_coverage_5/group_bin_occupancy": 0.85078125, "signal/frontier_coverage_5/group_std_mean": 0.19772669970989226, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018974500941112637, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018974500941112637, "signal/frontier_ece_reward/centered_abs_mean": 0.007248471491038799, "signal/frontier_ece_reward/group_bin_occupancy": 0.81953125, "signal/frontier_ece_reward/group_std_mean": 0.010719313845038414, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007248471258208156, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007248471258208156, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2787540197372437, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3564418852329254, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027875401824712754, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027875401824712754, "step": 235 }, { "calibration/aurc": 0.299067613553852, "calibration/batch_distribution_entropy": 0.9776422799775325, "calibration/batch_entropy_100bins": 0.9544932057058523, "calibration/batch_entropy_10bins": 0.9776422799775325, "calibration/batch_entropy_50bins": 0.9740477455082059, "calibration/batch_uniqueness": 0.9616119384765625, "calibration/buffer_distribution_entropy": 0.9990121183367439, "calibration/buffer_entropy_100bins": 0.9915586285603151, "calibration/buffer_entropy_10bins": 0.9990121183367439, "calibration/buffer_entropy_50bins": 0.9953215015311525, "calibration/confidence_entropy": 0.5014430602492166, "calibration/coverage@0%": 0.066015625, "calibration/coverage@1%": 0.066015625, "calibration/coverage@10%": 0.216015625, "calibration/coverage@15%": 0.26328125, "calibration/coverage@20%": 0.366015625, "calibration/coverage@25%": 0.455859375, "calibration/coverage@30%": 0.491015625, "calibration/coverage@5%": 0.090625, "calibration/ece": 0.16444165512890357, "calibration/mean_confidence": 0.47969150508930003, "calibration/prompt_uniqueness": 0.868505859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1003.0, "completions/max_terminated_length": 624.2, "completions/mean_length": 191.987890625, "completions/mean_terminated_length": 191.72521362304687, "completions/min_length": 80.8, "completions/min_terminated_length": 80.8, "epoch": 0.768, "grad_norm": 0.001026144833303988, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 805783192.0, "reward": 0.9215254068374634, "reward_std": 0.07930080592632294, "rewards/accuracy_reward": 0.51201171875, "rewards/brier_reward": 0.8031093597412109, "rewards/confidence_uniqueness_reward": 0.9619287371635437, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002836792590096593, "rewards/frontier_coverage_0": 0.13882942795753478, "rewards/frontier_coverage_1": 0.13882942795753478, "rewards/frontier_coverage_10": 0.13846020698547362, "rewards/frontier_coverage_15": 0.13496174067258834, "rewards/frontier_coverage_20": 0.10879542678594589, "rewards/frontier_coverage_25": 0.0587244875729084, "rewards/frontier_coverage_5": 0.13846020698547362, "rewards/frontier_ece_reward": 0.003680743183940649, "rewards/frontier_entropy_batch_reward": -0.21932466328144073, "signal/accuracy_reward/centered_abs_mean": 0.079571533203125, "signal/accuracy_reward/group_bin_occupancy": 0.166015625, "signal/accuracy_reward/group_std_mean": 0.10974450260400773, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0397857666015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0397857666015625, "signal/advantage_abs_mean": 0.0608487643301487, "signal/advantage_pre_scale_abs_mean": 0.0608487643301487, "signal/advantage_pre_scale_std": 0.09865092337131501, "signal/advantage_std": 0.09865092337131501, "signal/brier_reward/centered_abs_mean": 0.10833943039178848, "signal/brier_reward/group_bin_occupancy": 0.8515625, "signal/brier_reward/group_std_mean": 0.14004457592964173, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010833943635225296, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010833943635225296, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012927094288170338, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91015625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016739430651068688, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292709424160421, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292709424160421, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024011209141463043, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375, "signal/frontier_aurc_reward/group_std_mean": 0.003994084335863591, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.001401055371389e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.001401055371389e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15316719114780425, "signal/frontier_coverage_0/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_0/group_std_mean": 0.19865505993366242, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001914589968509972, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001914589968509972, "signal/frontier_coverage_1/centered_abs_mean": 0.15316719114780425, "signal/frontier_coverage_1/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_1/group_std_mean": 0.19865505993366242, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001914589968509972, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001914589968509972, "signal/frontier_coverage_10/centered_abs_mean": 0.15222469270229338, "signal/frontier_coverage_10/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_10/group_std_mean": 0.19741056561470033, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001902808714658022, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001902808714658022, "signal/frontier_coverage_15/centered_abs_mean": 0.1450010806322098, "signal/frontier_coverage_15/group_bin_occupancy": 0.87578125, "signal/frontier_coverage_15/group_std_mean": 0.18793485462665557, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018125135218724608, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018125135218724608, "signal/frontier_coverage_20/centered_abs_mean": 0.10850205421447753, "signal/frontier_coverage_20/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_20/group_std_mean": 0.14055884182453154, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013562757056206464, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013562757056206464, "signal/frontier_coverage_25/centered_abs_mean": 0.05681398212909698, "signal/frontier_coverage_25/group_bin_occupancy": 0.921875, "signal/frontier_coverage_25/group_std_mean": 0.07298188954591751, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007101748022250831, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007101748022250831, "signal/frontier_coverage_5/centered_abs_mean": 0.15222469270229338, "signal/frontier_coverage_5/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_5/group_std_mean": 0.19741056561470033, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001902808714658022, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001902808714658022, "signal/frontier_ece_reward/centered_abs_mean": 0.006277401559054851, "signal/frontier_ece_reward/group_bin_occupancy": 0.837109375, "signal/frontier_ece_reward/group_std_mean": 0.009239476174116135, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006277402047999203, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006277402047999203, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2777975261211395, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35362735390663147, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027779752761125563, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027779752761125563, "step": 240 }, { "calibration/aurc": 0.32235280813281475, "calibration/batch_distribution_entropy": 0.9786483096457314, "calibration/batch_entropy_100bins": 0.9500265876570528, "calibration/batch_entropy_10bins": 0.9786483096457314, "calibration/batch_entropy_50bins": 0.9700209996178053, "calibration/batch_uniqueness": 0.9649993896484375, "calibration/buffer_distribution_entropy": 0.9989889119809028, "calibration/buffer_entropy_100bins": 0.9915393121863978, "calibration/buffer_entropy_10bins": 0.9989889119809028, "calibration/buffer_entropy_50bins": 0.9953476043016061, "calibration/confidence_entropy": 0.4872507579959541, "calibration/coverage@0%": 0.016796875, "calibration/coverage@1%": 0.016796875, "calibration/coverage@10%": 0.153515625, "calibration/coverage@15%": 0.271875, "calibration/coverage@20%": 0.31484375, "calibration/coverage@25%": 0.343359375, "calibration/coverage@30%": 0.3921875, "calibration/coverage@5%": 0.114453125, "calibration/ece": 0.15300725756631883, "calibration/mean_confidence": 0.509216670802342, "calibration/prompt_uniqueness": 0.8611328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1067.4, "completions/max_terminated_length": 785.6, "completions/mean_length": 189.0556640625, "completions/mean_terminated_length": 188.79233093261718, "completions/min_length": 81.2, "completions/min_terminated_length": 81.2, "epoch": 0.784, "grad_norm": 0.0008188265492208302, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 822893490.0, "reward": 0.9416593551635742, "reward_std": 0.08285669684410095, "rewards/accuracy_reward": 0.5642578125, "rewards/brier_reward": 0.7843694448471069, "rewards/confidence_uniqueness_reward": 0.9650574326515198, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003067029034718871, "rewards/frontier_coverage_0": 0.08356368988752365, "rewards/frontier_coverage_1": 0.08356368988752365, "rewards/frontier_coverage_10": 0.08326268717646598, "rewards/frontier_coverage_15": 0.07886564061045646, "rewards/frontier_coverage_20": 0.06277668662369251, "rewards/frontier_coverage_25": 0.044081108272075654, "rewards/frontier_coverage_5": 0.08318910598754883, "rewards/frontier_ece_reward": 0.002441513957455754, "rewards/frontier_entropy_batch_reward": -0.22011671662330629, "signal/accuracy_reward/centered_abs_mean": 0.09090576171875, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.12054677605628968, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045452880859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045452880859375, "signal/advantage_abs_mean": 0.06448552757501602, "signal/advantage_pre_scale_abs_mean": 0.06448552757501602, "signal/advantage_pre_scale_std": 0.10121935606002808, "signal/advantage_std": 0.10121935606002808, "signal/brier_reward/centered_abs_mean": 0.1141832172870636, "signal/brier_reward/group_bin_occupancy": 0.8546875, "signal/brier_reward/group_std_mean": 0.14592179358005525, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011418322287499904, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011418322287499904, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012644784711301326, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8859375, "signal/confidence_uniqueness_reward/group_std_mean": 0.01663502026349306, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012644784990698099, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012644784990698099, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026621847413480283, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625, "signal/frontier_aurc_reward/group_std_mean": 0.0041458617430180315, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.327731028548442e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.327731028548442e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1563648372888565, "signal/frontier_coverage_0/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_0/group_std_mean": 0.200226292014122, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001954560517333448, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001954560517333448, "signal/frontier_coverage_1/centered_abs_mean": 0.1563648372888565, "signal/frontier_coverage_1/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_1/group_std_mean": 0.200226292014122, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001954560517333448, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001954560517333448, "signal/frontier_coverage_10/centered_abs_mean": 0.1553775906562805, "signal/frontier_coverage_10/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_10/group_std_mean": 0.19900963306427003, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019422198878601194, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019422198878601194, "signal/frontier_coverage_15/centered_abs_mean": 0.1479180335998535, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.18967563509941102, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018489754293113947, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018489754293113947, "signal/frontier_coverage_20/centered_abs_mean": 0.10082450807094574, "signal/frontier_coverage_20/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_20/group_std_mean": 0.1297900453209877, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001260306383483112, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001260306383483112, "signal/frontier_coverage_25/centered_abs_mean": 0.055328131467103955, "signal/frontier_coverage_25/group_bin_occupancy": 0.928125, "signal/frontier_coverage_25/group_std_mean": 0.07085389196872711, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006916016573086381, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006916016573086381, "signal/frontier_coverage_5/centered_abs_mean": 0.15557830333709716, "signal/frontier_coverage_5/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_5/group_std_mean": 0.19926558434963226, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001944728777743876, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001944728777743876, "signal/frontier_ece_reward/centered_abs_mean": 0.006715606153011322, "signal/frontier_ece_reward/group_bin_occupancy": 0.83828125, "signal/frontier_ece_reward/group_std_mean": 0.009772182628512382, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006715606432408094, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006715606432408094, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2859824955463409, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3597340643405914, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028598250076174735, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028598250076174735, "step": 245 }, { "calibration/aurc": 0.21915763575922814, "calibration/batch_distribution_entropy": 0.9810695570789931, "calibration/batch_entropy_100bins": 0.9486454306483759, "calibration/batch_entropy_10bins": 0.9810695570789931, "calibration/batch_entropy_50bins": 0.9722712681327159, "calibration/batch_uniqueness": 0.965252685546875, "calibration/buffer_distribution_entropy": 0.9990129339746321, "calibration/buffer_entropy_100bins": 0.9912903876248045, "calibration/buffer_entropy_10bins": 0.9990129339746321, "calibration/buffer_entropy_50bins": 0.9953314983930802, "calibration/confidence_entropy": 0.4940070201234362, "calibration/coverage@0%": 0.041015625, "calibration/coverage@1%": 0.041015625, "calibration/coverage@10%": 0.23515625, "calibration/coverage@15%": 0.365625, "calibration/coverage@20%": 0.521484375, "calibration/coverage@25%": 0.6296875, "calibration/coverage@30%": 0.7234375, "calibration/coverage@5%": 0.091015625, "calibration/ece": 0.1043095249640625, "calibration/mean_confidence": 0.5099823944953126, "calibration/prompt_uniqueness": 0.863818359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 913.4, "completions/max_terminated_length": 492.4, "completions/mean_length": 185.3513671875, "completions/mean_terminated_length": 184.95630493164063, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.8, "grad_norm": 0.0009886363986879587, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 839802048.0, "reward": 0.9549939274787903, "reward_std": 0.08174641579389572, "rewards/accuracy_reward": 0.59150390625, "rewards/brier_reward": 0.809298062324524, "rewards/confidence_uniqueness_reward": 0.9655136346817017, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.00310264159925282, "rewards/frontier_coverage_0": 0.08750025108456612, "rewards/frontier_coverage_1": 0.08750025108456612, "rewards/frontier_coverage_10": 0.08737820237874985, "rewards/frontier_coverage_15": 0.08418880626559258, "rewards/frontier_coverage_20": 0.06368328407406806, "rewards/frontier_coverage_25": 0.04929944053292275, "rewards/frontier_coverage_5": 0.08737820237874985, "rewards/frontier_ece_reward": 0.0034091237001121046, "rewards/frontier_entropy_batch_reward": -0.25231444239616396, "signal/accuracy_reward/centered_abs_mean": 0.084820556640625, "signal/accuracy_reward/group_bin_occupancy": 0.165625, "signal/accuracy_reward/group_std_mean": 0.11230973601341247, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0424102783203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0424102783203125, "signal/advantage_abs_mean": 0.0636213093996048, "signal/advantage_pre_scale_abs_mean": 0.0636213093996048, "signal/advantage_pre_scale_std": 0.10311011075973511, "signal/advantage_std": 0.10311011075973511, "signal/brier_reward/centered_abs_mean": 0.1033732384443283, "signal/brier_reward/group_bin_occupancy": 0.84375, "signal/brier_reward/group_std_mean": 0.1343769446015358, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010337324067950248, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010337324067950248, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013043990544974803, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87109375, "signal/confidence_uniqueness_reward/group_std_mean": 0.017386937327682973, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001304399105720222, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001304399105720222, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00283603323623538, "signal/frontier_aurc_reward/group_bin_occupancy": 0.704296875, "signal/frontier_aurc_reward/group_std_mean": 0.0045263932552188635, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.545041545294225e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.545041545294225e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13621854037046432, "signal/frontier_coverage_0/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_0/group_std_mean": 0.17529793679714203, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017027317779138684, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017027317779138684, "signal/frontier_coverage_1/centered_abs_mean": 0.13621854037046432, "signal/frontier_coverage_1/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_1/group_std_mean": 0.17529793679714203, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017027317779138684, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017027317779138684, "signal/frontier_coverage_10/centered_abs_mean": 0.13600390702486037, "signal/frontier_coverage_10/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_10/group_std_mean": 0.17503868341445922, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017000488704070448, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017000488704070448, "signal/frontier_coverage_15/centered_abs_mean": 0.12785129249095917, "signal/frontier_coverage_15/group_bin_occupancy": 0.859765625, "signal/frontier_coverage_15/group_std_mean": 0.16478919386863708, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015981412259861826, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015981412259861826, "signal/frontier_coverage_20/centered_abs_mean": 0.08313901722431183, "signal/frontier_coverage_20/group_bin_occupancy": 0.868359375, "signal/frontier_coverage_20/group_std_mean": 0.10833462625741959, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00103923772694543, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00103923772694543, "signal/frontier_coverage_25/centered_abs_mean": 0.05012721195816994, "signal/frontier_coverage_25/group_bin_occupancy": 0.926953125, "signal/frontier_coverage_25/group_std_mean": 0.06447599828243256, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006265901494771243, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006265901494771243, "signal/frontier_coverage_5/centered_abs_mean": 0.13600390702486037, "signal/frontier_coverage_5/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_5/group_std_mean": 0.17503868341445922, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017000488704070448, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017000488704070448, "signal/frontier_ece_reward/centered_abs_mean": 0.007046621013432741, "signal/frontier_ece_reward/group_bin_occupancy": 0.829296875, "signal/frontier_ece_reward/group_std_mean": 0.010701733268797397, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000704662105999887, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000704662105999887, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29779070019721987, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7203125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36754211187362673, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02977906949818134, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02977906949818134, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4222622649766603, "eval_calibration/batch_distribution_entropy": 0.93831284978888, "eval_calibration/batch_entropy_100bins": 0.7008354586552061, "eval_calibration/batch_entropy_10bins": 0.93831284978888, "eval_calibration/batch_entropy_50bins": 0.7678645493443049, "eval_calibration/batch_uniqueness": 0.9052734375, "eval_calibration/buffer_distribution_entropy": 0.999063500343571, "eval_calibration/buffer_entropy_100bins": 0.9911236376097536, "eval_calibration/buffer_entropy_10bins": 0.999063500343571, "eval_calibration/buffer_entropy_50bins": 0.9954196166686817, "eval_calibration/confidence_entropy": 0.48469888985633597, "eval_calibration/coverage@0%": 0.0703125, "eval_calibration/coverage@1%": 0.0703125, "eval_calibration/coverage@10%": 0.0703125, "eval_calibration/coverage@15%": 0.0703125, "eval_calibration/coverage@20%": 0.0703125, "eval_calibration/coverage@25%": 0.140625, "eval_calibration/coverage@30%": 0.4296875, "eval_calibration/coverage@5%": 0.0703125, "eval_calibration/ece": 0.193647390625, "eval_calibration/mean_confidence": 0.47742710937499994, "eval_calibration/prompt_uniqueness": 0.9052734375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 417.75, "eval_completions/max_terminated_length": 417.75, "eval_completions/mean_length": 189.96800994873047, "eval_completions/mean_terminated_length": 189.96800994873047, "eval_completions/min_length": 99.5, "eval_completions/min_terminated_length": 99.5, "eval_loss": 0.0, "eval_num_tokens": 839802048.0, "eval_reward": 0.8031501024961472, "eval_reward_std": 0.2302834540605545, "eval_rewards/accuracy_reward": 0.43359375, "eval_rewards/brier_reward": 0.8097970336675644, "eval_rewards/confidence_uniqueness_reward": 0.909912109375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0030607732478529215, "eval_rewards/frontier_coverage_0": 0.19227121397852898, "eval_rewards/frontier_coverage_1": 0.19227121397852898, "eval_rewards/frontier_coverage_10": 0.19090014696121216, "eval_rewards/frontier_coverage_15": 0.17611178383231163, "eval_rewards/frontier_coverage_20": 0.11594182625412941, "eval_rewards/frontier_coverage_25": 0.05809914506971836, "eval_rewards/frontier_coverage_5": 0.19090014696121216, "eval_rewards/frontier_ece_reward": 0.0046437275595963, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 21.1941, "eval_samples_per_second": 23.592, "eval_signal/accuracy_reward/centered_abs_mean": 0.474853515625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4946432411670685, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2374267578125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2374267578125, "eval_signal/advantage_abs_mean": 0.21750148758292198, "eval_signal/advantage_pre_scale_abs_mean": 0.21750148758292198, "eval_signal/advantage_pre_scale_std": 0.22778696939349174, "eval_signal/advantage_std": 0.22778696939349174, "eval_signal/brier_reward/centered_abs_mean": 0.17260025814175606, "eval_signal/brier_reward/group_bin_occupancy": 0.8671875, "eval_signal/brier_reward/group_std_mean": 0.2234898954629898, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01726002711802721, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01726002711802721, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0348052978515625, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.34375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04058399423956871, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034805297618731856, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034805297618731856, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0038759367307648063, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7421875, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006893252138979733, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.844920840696432e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.844920840696432e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.34364357590675354, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_0/group_std_mean": 0.41618141531944275, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004295544931665063, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004295544931665063, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.34364357590675354, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.41618141531944275, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004295544931665063, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004295544931665063, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3414214551448822, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.4136466532945633, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004267768119461834, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004267768119461834, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3166361153125763, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_15/group_std_mean": 0.3851661831140518, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003957951499614865, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003957951499614865, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.19747909903526306, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9140625, "eval_signal/frontier_coverage_20/group_std_mean": 0.24594665691256523, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024684888776391745, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024684888776391745, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08342637866735458, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.10634090937674046, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010428297682665288, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010428297682665288, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3414214551448822, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.4136466532945633, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004267768119461834, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004267768119461834, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.00797420903109014, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.90625, "eval_signal/frontier_ece_reward/group_std_mean": 0.011368014384061098, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007974208710948005, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007974208710948005, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.189, "step": 250 }, { "calibration/aurc": 0.23524356922838585, "calibration/batch_distribution_entropy": 0.9748226569066292, "calibration/batch_entropy_100bins": 0.9464532567711041, "calibration/batch_entropy_10bins": 0.9748226569066292, "calibration/batch_entropy_50bins": 0.9691353813672773, "calibration/batch_uniqueness": 0.9648882276804489, "calibration/buffer_distribution_entropy": 0.9989431393987553, "calibration/buffer_entropy_100bins": 0.9907328547968023, "calibration/buffer_entropy_10bins": 0.9989431393987553, "calibration/buffer_entropy_50bins": 0.9952267925200537, "calibration/confidence_entropy": 0.48285694613510166, "calibration/coverage@0%": 0.0140625, "calibration/coverage@1%": 0.0140625, "calibration/coverage@10%": 0.10546875, "calibration/coverage@15%": 0.2125, "calibration/coverage@20%": 0.3839920193248532, "calibration/coverage@25%": 0.6714377446183952, "calibration/coverage@30%": 0.798828125, "calibration/coverage@5%": 0.03046875, "calibration/ece": 0.13733046909174487, "calibration/mean_confidence": 0.5263182425337444, "calibration/prompt_uniqueness": 0.8637435674915451, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 753.4, "completions/max_terminated_length": 585.2, "completions/mean_length": 182.904296875, "completions/mean_terminated_length": 182.77190551757812, "completions/min_length": 88.8, "completions/min_terminated_length": 88.8, "epoch": 0.816, "grad_norm": 0.001030449173413217, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 856774156.0, "reward": 0.9531601071357727, "reward_std": 0.08292276561260223, "rewards/accuracy_reward": 0.59013671875, "rewards/brier_reward": 0.7899926781654358, "rewards/confidence_uniqueness_reward": 0.9667759299278259, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0026666073594242335, "rewards/frontier_coverage_0": 0.06293640360236168, "rewards/frontier_coverage_1": 0.06293640360236168, "rewards/frontier_coverage_10": 0.06290345415472984, "rewards/frontier_coverage_15": 0.06183330789208412, "rewards/frontier_coverage_20": 0.052926937490701674, "rewards/frontier_coverage_25": 0.04199915751814842, "rewards/frontier_coverage_5": 0.06285227611660957, "rewards/frontier_ece_reward": 0.002211177465505898, "rewards/frontier_entropy_batch_reward": -0.22828937768936158, "signal/accuracy_reward/centered_abs_mean": 0.090277099609375, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.11830563694238663, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451385498046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451385498046875, "signal/advantage_abs_mean": 0.06491014659404755, "signal/advantage_pre_scale_abs_mean": 0.06491014659404755, "signal/advantage_pre_scale_std": 0.1037605032324791, "signal/advantage_std": 0.1037605032324791, "signal/brier_reward/centered_abs_mean": 0.11094661056995392, "signal/brier_reward/group_bin_occupancy": 0.86171875, "signal/brier_reward/group_std_mean": 0.1411285489797592, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011094661056995391, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011094661056995391, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012302939221262932, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.869140625, "signal/confidence_uniqueness_reward/group_std_mean": 0.01582129541784525, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012302939547225833, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012302939547225833, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002399337338283658, "signal/frontier_aurc_reward/group_bin_occupancy": 0.738671875, "signal/frontier_aurc_reward/group_std_mean": 0.0037662754766643047, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9991718110977673e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9991718110977673e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1482946664094925, "signal/frontier_coverage_0/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_0/group_std_mean": 0.19012218713760376, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001853683264926076, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001853683264926076, "signal/frontier_coverage_1/centered_abs_mean": 0.1482946664094925, "signal/frontier_coverage_1/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_1/group_std_mean": 0.19012218713760376, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001853683264926076, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001853683264926076, "signal/frontier_coverage_10/centered_abs_mean": 0.14743364751338958, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.18905034363269807, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018429205985739828, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018429205985739828, "signal/frontier_coverage_15/centered_abs_mean": 0.13868501037359238, "signal/frontier_coverage_15/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_15/group_std_mean": 0.17817612886428832, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017335626529529692, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017335626529529692, "signal/frontier_coverage_20/centered_abs_mean": 0.08847524970769882, "signal/frontier_coverage_20/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_20/group_std_mean": 0.11435707211494446, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011059406446292997, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011059406446292997, "signal/frontier_coverage_25/centered_abs_mean": 0.05190168023109436, "signal/frontier_coverage_25/group_bin_occupancy": 0.920703125, "signal/frontier_coverage_25/group_std_mean": 0.06661412790417671, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006487710168585181, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006487710168585181, "signal/frontier_coverage_5/centered_abs_mean": 0.1475912719964981, "signal/frontier_coverage_5/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_5/group_std_mean": 0.18924154639244078, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018448908813297749, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018448908813297749, "signal/frontier_ece_reward/centered_abs_mean": 0.006282018590718507, "signal/frontier_ece_reward/group_bin_occupancy": 0.843359375, "signal/frontier_ece_reward/group_std_mean": 0.009374569542706013, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006282018381170929, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006282018381170929, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29568083882331847, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.717578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3687551856040955, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029568084701895713, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029568084701895713, "step": 255 }, { "calibration/aurc": 0.2818856091214798, "calibration/batch_distribution_entropy": 0.9755369371648703, "calibration/batch_entropy_100bins": 0.9477719023713969, "calibration/batch_entropy_10bins": 0.9755369371648703, "calibration/batch_entropy_50bins": 0.9714799065497696, "calibration/batch_uniqueness": 0.9651092529296875, "calibration/buffer_distribution_entropy": 0.9988617474514427, "calibration/buffer_entropy_100bins": 0.9901376392375262, "calibration/buffer_entropy_10bins": 0.9988617474514427, "calibration/buffer_entropy_50bins": 0.9950786388567818, "calibration/confidence_entropy": 0.49679534425027755, "calibration/coverage@0%": 0.039453125, "calibration/coverage@1%": 0.039453125, "calibration/coverage@10%": 0.203125, "calibration/coverage@15%": 0.248046875, "calibration/coverage@20%": 0.32890625, "calibration/coverage@25%": 0.432421875, "calibration/coverage@30%": 0.51171875, "calibration/coverage@5%": 0.160546875, "calibration/ece": 0.11443988386113282, "calibration/mean_confidence": 0.4846168869722266, "calibration/prompt_uniqueness": 0.867236328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 855.6, "completions/max_terminated_length": 669.8, "completions/mean_length": 183.81455078125, "completions/mean_terminated_length": 183.682763671875, "completions/min_length": 81.4, "completions/min_terminated_length": 81.4, "epoch": 0.832, "grad_norm": 0.0009330803877674043, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 873664769.0, "reward": 0.9404654026031494, "reward_std": 0.08173245638608932, "rewards/accuracy_reward": 0.55751953125, "rewards/brier_reward": 0.8090359687805175, "rewards/confidence_uniqueness_reward": 0.9651769518852233, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002471396827604622, "rewards/frontier_coverage_0": 0.11028219759464264, "rewards/frontier_coverage_1": 0.11028219759464264, "rewards/frontier_coverage_10": 0.10962048023939133, "rewards/frontier_coverage_15": 0.10224549621343612, "rewards/frontier_coverage_20": 0.07180028259754181, "rewards/frontier_coverage_25": 0.053031648695468905, "rewards/frontier_coverage_5": 0.10962048023939133, "rewards/frontier_ece_reward": 0.0031542435754090548, "rewards/frontier_entropy_batch_reward": -0.24287400245666504, "signal/accuracy_reward/centered_abs_mean": 0.090777587890625, "signal/accuracy_reward/group_bin_occupancy": 0.167578125, "signal/accuracy_reward/group_std_mean": 0.11992976069450378, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0453887939453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0453887939453125, "signal/advantage_abs_mean": 0.06406652480363846, "signal/advantage_pre_scale_abs_mean": 0.06406652480363846, "signal/advantage_pre_scale_std": 0.10220163762569427, "signal/advantage_std": 0.10220163762569427, "signal/brier_reward/centered_abs_mean": 0.10323716104030609, "signal/brier_reward/group_bin_occupancy": 0.856640625, "signal/brier_reward/group_std_mean": 0.1322301909327507, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010323716327548027, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010323716327548027, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012594187259674072, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.855078125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016454468481242657, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001259418693371117, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001259418693371117, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002055089036002755, "signal/frontier_aurc_reward/group_bin_occupancy": 0.741796875, "signal/frontier_aurc_reward/group_std_mean": 0.003259465633891523, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5688614914542996e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5688614914542996e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14981609880924224, "signal/frontier_coverage_0/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_0/group_std_mean": 0.19165619909763337, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018727012909948825, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018727012909948825, "signal/frontier_coverage_1/centered_abs_mean": 0.14981609880924224, "signal/frontier_coverage_1/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_1/group_std_mean": 0.19165619909763337, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018727012909948825, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018727012909948825, "signal/frontier_coverage_10/centered_abs_mean": 0.14883655905723572, "signal/frontier_coverage_10/group_bin_occupancy": 0.86875, "signal/frontier_coverage_10/group_std_mean": 0.19037957787513732, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001860457076691091, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001860457076691091, "signal/frontier_coverage_15/centered_abs_mean": 0.13789782375097276, "signal/frontier_coverage_15/group_bin_occupancy": 0.86875, "signal/frontier_coverage_15/group_std_mean": 0.17621307969093322, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017237228574231267, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017237228574231267, "signal/frontier_coverage_20/centered_abs_mean": 0.08962543904781342, "signal/frontier_coverage_20/group_bin_occupancy": 0.87890625, "signal/frontier_coverage_20/group_std_mean": 0.11485566943883896, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011203179834410547, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011203179834410547, "signal/frontier_coverage_25/centered_abs_mean": 0.052447068691253665, "signal/frontier_coverage_25/group_bin_occupancy": 0.9203125, "signal/frontier_coverage_25/group_std_mean": 0.06672648042440414, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006555883679538965, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006555883679538965, "signal/frontier_coverage_5/centered_abs_mean": 0.14883655905723572, "signal/frontier_coverage_5/group_bin_occupancy": 0.86875, "signal/frontier_coverage_5/group_std_mean": 0.19037957787513732, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001860457076691091, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001860457076691091, "signal/frontier_ece_reward/centered_abs_mean": 0.006253997515887022, "signal/frontier_ece_reward/group_bin_occupancy": 0.85625, "signal/frontier_ece_reward/group_std_mean": 0.009096156992018222, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006253997562453151, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006253997562453151, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.290888249874115, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3610431671142578, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029088825359940527, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029088825359940527, "step": 260 }, { "calibration/aurc": 0.3199682212623704, "calibration/batch_distribution_entropy": 0.9698417146444305, "calibration/batch_entropy_100bins": 0.9435716120808323, "calibration/batch_entropy_10bins": 0.9698417146444305, "calibration/batch_entropy_50bins": 0.9666622314375889, "calibration/batch_uniqueness": 0.96644287109375, "calibration/buffer_distribution_entropy": 0.9989431015829364, "calibration/buffer_entropy_100bins": 0.9895180210867552, "calibration/buffer_entropy_10bins": 0.9989431015829364, "calibration/buffer_entropy_50bins": 0.9950208033407677, "calibration/confidence_entropy": 0.4952000686126718, "calibration/coverage@0%": 0.037109375, "calibration/coverage@1%": 0.04140625, "calibration/coverage@10%": 0.1703125, "calibration/coverage@15%": 0.258984375, "calibration/coverage@20%": 0.419140625, "calibration/coverage@25%": 0.47734375, "calibration/coverage@30%": 0.519921875, "calibration/coverage@5%": 0.082421875, "calibration/ece": 0.1614252955334596, "calibration/mean_confidence": 0.5468318580334597, "calibration/prompt_uniqueness": 0.86171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 783.8, "completions/max_terminated_length": 615.2, "completions/mean_length": 182.1521484375, "completions/mean_terminated_length": 182.01974487304688, "completions/min_length": 84.8, "completions/min_terminated_length": 84.8, "epoch": 0.848, "grad_norm": 0.0010821467731148005, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 890544375.0, "reward": 0.930538809299469, "reward_std": 0.08106714338064194, "rewards/accuracy_reward": 0.5341796875, "rewards/brier_reward": 0.8012025237083436, "rewards/confidence_uniqueness_reward": 0.9661448240280152, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003271967126056552, "rewards/frontier_coverage_0": 0.11554919332265853, "rewards/frontier_coverage_1": 0.11554919332265853, "rewards/frontier_coverage_10": 0.11485711932182312, "rewards/frontier_coverage_15": 0.11126702874898911, "rewards/frontier_coverage_20": 0.0689346432685852, "rewards/frontier_coverage_25": 0.04922807216644287, "rewards/frontier_coverage_5": 0.11537581384181976, "rewards/frontier_ece_reward": 0.0030869925394654274, "rewards/frontier_entropy_batch_reward": -0.22139265537261962, "signal/accuracy_reward/centered_abs_mean": 0.08153076171875, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.11394334435462952, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040765380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.040765380859375, "signal/advantage_abs_mean": 0.061507892608642575, "signal/advantage_pre_scale_abs_mean": 0.061507892608642575, "signal/advantage_pre_scale_std": 0.10090996772050857, "signal/advantage_std": 0.10090996772050857, "signal/brier_reward/centered_abs_mean": 0.10592394173145295, "signal/brier_reward/group_bin_occupancy": 0.841796875, "signal/brier_reward/group_std_mean": 0.13790196180343628, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01059239376336336, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01059239376336336, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012609278596937657, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.865234375, "signal/confidence_uniqueness_reward/group_std_mean": 0.016177338361740113, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012609278550371529, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012609278550371529, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029984854627400637, "signal/frontier_aurc_reward/group_bin_occupancy": 0.721875, "signal/frontier_aurc_reward/group_std_mean": 0.0048636754509061575, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7481067192857156e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7481067192857156e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1415121629834175, "signal/frontier_coverage_0/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_0/group_std_mean": 0.18623048067092896, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017689020838588475, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017689020838588475, "signal/frontier_coverage_1/centered_abs_mean": 0.1415121629834175, "signal/frontier_coverage_1/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_1/group_std_mean": 0.18623048067092896, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017689020838588475, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017689020838588475, "signal/frontier_coverage_10/centered_abs_mean": 0.14053474068641664, "signal/frontier_coverage_10/group_bin_occupancy": 0.851171875, "signal/frontier_coverage_10/group_std_mean": 0.18495951294898988, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017566842725500464, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017566842725500464, "signal/frontier_coverage_15/centered_abs_mean": 0.1342177927494049, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.17671539783477783, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016777224140241743, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016777224140241743, "signal/frontier_coverage_20/centered_abs_mean": 0.08487182259559631, "signal/frontier_coverage_20/group_bin_occupancy": 0.88359375, "signal/frontier_coverage_20/group_std_mean": 0.11134538352489472, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010608977987430989, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010608977987430989, "signal/frontier_coverage_25/centered_abs_mean": 0.05198915079236031, "signal/frontier_coverage_25/group_bin_occupancy": 0.91640625, "signal/frontier_coverage_25/group_std_mean": 0.06686145663261414, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000649864412844181, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000649864412844181, "signal/frontier_coverage_5/centered_abs_mean": 0.14084831327199937, "signal/frontier_coverage_5/group_bin_occupancy": 0.851171875, "signal/frontier_coverage_5/group_std_mean": 0.18535879552364348, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017606039065867663, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017606039065867663, "signal/frontier_ece_reward/centered_abs_mean": 0.0055714274756610395, "signal/frontier_ece_reward/group_bin_occupancy": 0.877734375, "signal/frontier_ece_reward/group_std_mean": 0.007441604882478714, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005571427405811846, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005571427405811846, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2792421877384186, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35623074769973756, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027924218401312827, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027924218401312827, "step": 265 }, { "calibration/aurc": 0.26688358629600345, "calibration/batch_distribution_entropy": 0.956438995609551, "calibration/batch_entropy_100bins": 0.9302209820641183, "calibration/batch_entropy_10bins": 0.956438995609551, "calibration/batch_entropy_50bins": 0.9543006144858946, "calibration/batch_uniqueness": 0.96548082464166, "calibration/buffer_distribution_entropy": 0.9990004575823258, "calibration/buffer_entropy_100bins": 0.9889268859990621, "calibration/buffer_entropy_10bins": 0.9990004575823258, "calibration/buffer_entropy_50bins": 0.99498162743099, "calibration/confidence_entropy": 0.48691284355710457, "calibration/coverage@0%": 0.01328125, "calibration/coverage@1%": 0.01328125, "calibration/coverage@10%": 0.168359375, "calibration/coverage@15%": 0.240234375, "calibration/coverage@20%": 0.323828125, "calibration/coverage@25%": 0.4125, "calibration/coverage@30%": 0.5446076932485322, "calibration/coverage@5%": 0.084375, "calibration/ece": 0.1455257910339714, "calibration/mean_confidence": 0.6091981775937194, "calibration/prompt_uniqueness": 0.8710765763202393, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1098.6, "completions/max_terminated_length": 918.8, "completions/mean_length": 181.7109375, "completions/mean_terminated_length": 181.44696960449218, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.864, "grad_norm": 0.0010378322331234813, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 907391911.0, "reward": 0.9530243515968323, "reward_std": 0.08318499326705933, "rewards/accuracy_reward": 0.59248046875, "rewards/brier_reward": 0.7973593950271607, "rewards/confidence_uniqueness_reward": 0.9641671776771545, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002714644838124514, "rewards/frontier_coverage_0": 0.07732260525226593, "rewards/frontier_coverage_1": 0.07732260525226593, "rewards/frontier_coverage_10": 0.07700667977333069, "rewards/frontier_coverage_15": 0.0751921996474266, "rewards/frontier_coverage_20": 0.056314506381750104, "rewards/frontier_coverage_25": 0.05060553103685379, "rewards/frontier_coverage_5": 0.0774739071726799, "rewards/frontier_ece_reward": 0.002765231346711516, "rewards/frontier_entropy_batch_reward": -0.2565395474433899, "signal/accuracy_reward/centered_abs_mean": 0.090386962890625, "signal/accuracy_reward/group_bin_occupancy": 0.166796875, "signal/accuracy_reward/group_std_mean": 0.11810308396816253, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451934814453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451934814453125, "signal/advantage_abs_mean": 0.0654950737953186, "signal/advantage_pre_scale_abs_mean": 0.0654950737953186, "signal/advantage_pre_scale_std": 0.10269584357738495, "signal/advantage_std": 0.10269584357738495, "signal/brier_reward/centered_abs_mean": 0.10956264287233353, "signal/brier_reward/group_bin_occupancy": 0.851171875, "signal/brier_reward/group_std_mean": 0.1409148782491684, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010956264473497868, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010956264473497868, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014184213988482953, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.834765625, "signal/confidence_uniqueness_reward/group_std_mean": 0.018567436560988426, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014184214174747466, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014184214174747466, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027129411697387694, "signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125, "signal/frontier_aurc_reward/group_std_mean": 0.0043897018767893314, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.391176614968572e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.391176614968572e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1481944888830185, "signal/frontier_coverage_0/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_0/group_std_mean": 0.18974127769470214, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018524311250075697, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018524311250075697, "signal/frontier_coverage_1/centered_abs_mean": 0.1481944888830185, "signal/frontier_coverage_1/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_1/group_std_mean": 0.18974127769470214, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018524311250075697, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018524311250075697, "signal/frontier_coverage_10/centered_abs_mean": 0.14702675938606263, "signal/frontier_coverage_10/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_10/group_std_mean": 0.1882396310567856, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018378345994278789, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018378345994278789, "signal/frontier_coverage_15/centered_abs_mean": 0.14057752192020417, "signal/frontier_coverage_15/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_15/group_std_mean": 0.1800085186958313, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017572190146893263, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017572190146893263, "signal/frontier_coverage_20/centered_abs_mean": 0.08514007031917573, "signal/frontier_coverage_20/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_20/group_std_mean": 0.10977080911397934, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010642508743330837, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010642508743330837, "signal/frontier_coverage_25/centered_abs_mean": 0.05421077758073807, "signal/frontier_coverage_25/group_bin_occupancy": 0.925, "signal/frontier_coverage_25/group_std_mean": 0.06963766515254974, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006776347407139837, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006776347407139837, "signal/frontier_coverage_5/centered_abs_mean": 0.1480186551809311, "signal/frontier_coverage_5/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_5/group_std_mean": 0.18951984047889708, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018502332037314772, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502332037314772, "signal/frontier_ece_reward/centered_abs_mean": 0.005847407225519419, "signal/frontier_ece_reward/group_bin_occupancy": 0.850390625, "signal/frontier_ece_reward/group_std_mean": 0.007833207491785288, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005847407272085547, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005847407272085547, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3008589863777161, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.715625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.371851509809494, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03008589893579483, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03008589893579483, "step": 270 }, { "calibration/aurc": 0.3599908256968588, "calibration/batch_distribution_entropy": 0.9765293546573472, "calibration/batch_entropy_100bins": 0.9479550555184086, "calibration/batch_entropy_10bins": 0.9765293546573472, "calibration/batch_entropy_50bins": 0.9718896673551072, "calibration/batch_uniqueness": 0.9620710457371701, "calibration/buffer_distribution_entropy": 0.9989072670831562, "calibration/buffer_entropy_100bins": 0.9881814276419251, "calibration/buffer_entropy_10bins": 0.9989072670831562, "calibration/buffer_entropy_50bins": 0.9947699849601171, "calibration/confidence_entropy": 0.4855467010850557, "calibration/coverage@0%": 0.016816750244618393, "calibration/coverage@1%": 0.016816750244618393, "calibration/coverage@10%": 0.02775807240704501, "calibration/coverage@15%": 0.06486897627201565, "calibration/coverage@20%": 0.1316910775440313, "calibration/coverage@25%": 0.2810864114481409, "calibration/coverage@30%": 0.3869679549902153, "calibration/coverage@5%": 0.016816750244618393, "calibration/ece": 0.13908466566091793, "calibration/mean_confidence": 0.47106601660332437, "calibration/prompt_uniqueness": 0.851669403616025, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 730.0, "completions/max_terminated_length": 618.6, "completions/mean_length": 175.8171875, "completions/mean_terminated_length": 175.41910095214843, "completions/min_length": 77.6, "completions/min_terminated_length": 77.6, "epoch": 0.88, "grad_norm": 0.0011793546145781875, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 924339351.0, "reward": 0.9199577450752259, "reward_std": 0.08229250609874725, "rewards/accuracy_reward": 0.5189453125, "rewards/brier_reward": 0.7955085277557373, "rewards/confidence_uniqueness_reward": 0.9621264696121216, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002559547405689955, "rewards/frontier_coverage_0": 0.12974209040403367, "rewards/frontier_coverage_1": 0.12974209040403367, "rewards/frontier_coverage_10": 0.12853662222623824, "rewards/frontier_coverage_15": 0.12217865586280822, "rewards/frontier_coverage_20": 0.0753389410674572, "rewards/frontier_coverage_25": 0.05095566734671593, "rewards/frontier_coverage_5": 0.12889423370361328, "rewards/frontier_ece_reward": 0.0026560436934232714, "rewards/frontier_entropy_batch_reward": -0.24932879209518433, "signal/accuracy_reward/centered_abs_mean": 0.09031982421875, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.11694404035806656, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045159912109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045159912109375, "signal/advantage_abs_mean": 0.06529273688793183, "signal/advantage_pre_scale_abs_mean": 0.06529273688793183, "signal/advantage_pre_scale_std": 0.102424056828022, "signal/advantage_std": 0.102424056828022, "signal/brier_reward/centered_abs_mean": 0.1097784698009491, "signal/brier_reward/group_bin_occupancy": 0.84375, "signal/brier_reward/group_std_mean": 0.14118833541870118, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010977847129106521, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010977847129106521, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014931019768118859, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.850390625, "signal/confidence_uniqueness_reward/group_std_mean": 0.019864151254296303, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014931020326912404, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014931020326912404, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814434766769, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021499829599633813, "signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625, "signal/frontier_aurc_reward/group_std_mean": 0.0034981849137693645, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6874786999542267e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6874786999542267e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.162190243601799, "signal/frontier_coverage_0/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_0/group_std_mean": 0.2070352703332901, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020273780450224877, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020273780450224877, "signal/frontier_coverage_1/centered_abs_mean": 0.162190243601799, "signal/frontier_coverage_1/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_1/group_std_mean": 0.2070352703332901, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020273780450224877, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020273780450224877, "signal/frontier_coverage_10/centered_abs_mean": 0.16074737310409545, "signal/frontier_coverage_10/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_10/group_std_mean": 0.2051818400621414, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002009342284873128, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002009342284873128, "signal/frontier_coverage_15/centered_abs_mean": 0.15363400876522065, "signal/frontier_coverage_15/group_bin_occupancy": 0.86875, "signal/frontier_coverage_15/group_std_mean": 0.19600152373313903, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019204251701012253, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019204251701012253, "signal/frontier_coverage_20/centered_abs_mean": 0.09273725599050522, "signal/frontier_coverage_20/group_bin_occupancy": 0.88359375, "signal/frontier_coverage_20/group_std_mean": 0.1185536801815033, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011592157417908311, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011592157417908311, "signal/frontier_coverage_25/centered_abs_mean": 0.053829978406429294, "signal/frontier_coverage_25/group_bin_occupancy": 0.9140625, "signal/frontier_coverage_25/group_std_mean": 0.06915899068117141, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006728747393935919, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006728747393935919, "signal/frontier_coverage_5/centered_abs_mean": 0.16112555861473082, "signal/frontier_coverage_5/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_5/group_std_mean": 0.2056680828332901, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00201406953856349, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00201406953856349, "signal/frontier_ece_reward/centered_abs_mean": 0.00555073544383049, "signal/frontier_ece_reward/group_bin_occupancy": 0.85859375, "signal/frontier_ece_reward/group_std_mean": 0.007629283983260393, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005550735630095005, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005550735630095005, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2958798289299011, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37051703929901125, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029587984085083008, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029587984085083008, "step": 275 }, { "calibration/aurc": 0.3397437648873166, "calibration/batch_distribution_entropy": 0.9796197507037563, "calibration/batch_entropy_100bins": 0.951087428797685, "calibration/batch_entropy_10bins": 0.9796197507037563, "calibration/batch_entropy_50bins": 0.9718575724598498, "calibration/batch_uniqueness": 0.965020751953125, "calibration/buffer_distribution_entropy": 0.9988991354973707, "calibration/buffer_entropy_100bins": 0.9878559564647965, "calibration/buffer_entropy_10bins": 0.9988991354973707, "calibration/buffer_entropy_50bins": 0.9948360874758867, "calibration/confidence_entropy": 0.49223803297364005, "calibration/coverage@0%": 0.027734375, "calibration/coverage@1%": 0.027734375, "calibration/coverage@10%": 0.08359375, "calibration/coverage@15%": 0.121875, "calibration/coverage@20%": 0.1984375, "calibration/coverage@25%": 0.38515625, "calibration/coverage@30%": 0.4953125, "calibration/coverage@5%": 0.05703125, "calibration/ece": 0.13973601884101564, "calibration/mean_confidence": 0.4944046061589843, "calibration/prompt_uniqueness": 0.854248046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 751.2, "completions/max_terminated_length": 593.2, "completions/mean_length": 174.5083984375, "completions/mean_terminated_length": 174.11077270507812, "completions/min_length": 81.2, "completions/min_terminated_length": 81.2, "epoch": 0.896, "grad_norm": 0.0028364313766360283, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 941237165.0, "reward": 0.935793137550354, "reward_std": 0.07484155595302582, "rewards/accuracy_reward": 0.54541015625, "rewards/brier_reward": 0.8021585941314697, "rewards/confidence_uniqueness_reward": 0.9650752425193787, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0026960095157846807, "rewards/frontier_coverage_0": 0.11437956839799882, "rewards/frontier_coverage_1": 0.11437956839799882, "rewards/frontier_coverage_10": 0.11248253881931305, "rewards/frontier_coverage_15": 0.10827968120574952, "rewards/frontier_coverage_20": 0.07057406008243561, "rewards/frontier_coverage_25": 0.05096975192427635, "rewards/frontier_coverage_5": 0.11267611980438233, "rewards/frontier_ece_reward": 0.0022103100549429656, "rewards/frontier_entropy_batch_reward": -0.2222293496131897, "signal/accuracy_reward/centered_abs_mean": 0.076336669921875, "signal/accuracy_reward/group_bin_occupancy": 0.166015625, "signal/accuracy_reward/group_std_mean": 0.10640045404434204, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0381683349609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0381683349609375, "signal/advantage_abs_mean": 0.05686543136835098, "signal/advantage_pre_scale_abs_mean": 0.05686543136835098, "signal/advantage_pre_scale_std": 0.09319915175437928, "signal/advantage_std": 0.09319915175437928, "signal/brier_reward/centered_abs_mean": 0.1045459121465683, "signal/brier_reward/group_bin_occupancy": 0.85078125, "signal/brier_reward/group_std_mean": 0.13514408171176912, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010454590804874897, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010454590804874897, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012861154228448867, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86171875, "signal/confidence_uniqueness_reward/group_std_mean": 0.017182295396924018, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012861154275014997, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012861154275014997, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814434766769, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021444797981530427, "signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375, "signal/frontier_aurc_reward/group_std_mean": 0.0034413845278322697, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6805997185874732e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6805997185874732e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15001226961612701, "signal/frontier_coverage_0/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_0/group_std_mean": 0.19234943985939026, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001875153393484652, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001875153393484652, "signal/frontier_coverage_1/centered_abs_mean": 0.15001226961612701, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.19234943985939026, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001875153393484652, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001875153393484652, "signal/frontier_coverage_10/centered_abs_mean": 0.14806943833827974, "signal/frontier_coverage_10/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_10/group_std_mean": 0.18986817002296447, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018508680164813994, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018508680164813994, "signal/frontier_coverage_15/centered_abs_mean": 0.13712047040462494, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.17578884959220886, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017140058567747473, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017140058567747473, "signal/frontier_coverage_20/centered_abs_mean": 0.08517580181360244, "signal/frontier_coverage_20/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_20/group_std_mean": 0.10908302515745164, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010646975366398691, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010646975366398691, "signal/frontier_coverage_25/centered_abs_mean": 0.05108058974146843, "signal/frontier_coverage_25/group_bin_occupancy": 0.910546875, "signal/frontier_coverage_25/group_std_mean": 0.06581792756915092, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006385074113495648, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006385074113495648, "signal/frontier_coverage_5/centered_abs_mean": 0.14839180409908295, "signal/frontier_coverage_5/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_5/group_std_mean": 0.1902903586626053, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001854897616431117, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001854897616431117, "signal/frontier_ece_reward/centered_abs_mean": 0.005655341129750013, "signal/frontier_ece_reward/group_bin_occupancy": 0.843359375, "signal/frontier_ece_reward/group_std_mean": 0.008241251390427352, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005655340966768563, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005655340966768563, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28620743155479433, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.707421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35727530121803286, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02862074300646782, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02862074300646782, "step": 280 }, { "calibration/aurc": 0.34525926666499224, "calibration/batch_distribution_entropy": 0.9790175360990441, "calibration/batch_entropy_100bins": 0.9482314322026811, "calibration/batch_entropy_10bins": 0.9790175360990441, "calibration/batch_entropy_50bins": 0.9722607174037909, "calibration/batch_uniqueness": 0.9663451804952441, "calibration/buffer_distribution_entropy": 0.9988060335454433, "calibration/buffer_entropy_100bins": 0.9873172216290733, "calibration/buffer_entropy_10bins": 0.9988060335454433, "calibration/buffer_entropy_50bins": 0.9947503690244401, "calibration/confidence_entropy": 0.49496733877215143, "calibration/coverage@0%": 0.020322437622309196, "calibration/coverage@1%": 0.020322437622309196, "calibration/coverage@10%": 0.10869159735812133, "calibration/coverage@15%": 0.23450266022504893, "calibration/coverage@20%": 0.3384211411448141, "calibration/coverage@25%": 0.4357211656066536, "calibration/coverage@30%": 0.4935504831213307, "calibration/coverage@5%": 0.06336227984344422, "calibration/ece": 0.1643798130198141, "calibration/mean_confidence": 0.5136691958170254, "calibration/prompt_uniqueness": 0.8665675708084027, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 917.2, "completions/max_terminated_length": 593.2, "completions/mean_length": 174.08798828125, "completions/mean_terminated_length": 173.82220153808595, "completions/min_length": 78.2, "completions/min_terminated_length": 78.2, "epoch": 0.912, "grad_norm": 0.0006290775490924716, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 958071122.0, "reward": 0.9379110097885132, "reward_std": 0.07883523255586625, "rewards/accuracy_reward": 0.55283203125, "rewards/brier_reward": 0.8023535490036011, "rewards/confidence_uniqueness_reward": 0.966447937488556, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002613516733981669, "rewards/frontier_coverage_0": 0.09809458376839757, "rewards/frontier_coverage_1": 0.09809458376839757, "rewards/frontier_coverage_10": 0.09699874348007143, "rewards/frontier_coverage_15": 0.09137383892666548, "rewards/frontier_coverage_20": 0.06403161454945802, "rewards/frontier_coverage_25": 0.05141137093305588, "rewards/frontier_coverage_5": 0.09733490133658051, "rewards/frontier_ece_reward": 0.0027125254506245255, "rewards/frontier_entropy_batch_reward": -0.22992810904979705, "signal/accuracy_reward/centered_abs_mean": 0.078399658203125, "signal/accuracy_reward/group_bin_occupancy": 0.165234375, "signal/accuracy_reward/group_std_mean": 0.10746027380228043, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0391998291015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0391998291015625, "signal/advantage_abs_mean": 0.06045843511819839, "signal/advantage_pre_scale_abs_mean": 0.06045843511819839, "signal/advantage_pre_scale_std": 0.0974207267165184, "signal/advantage_std": 0.0974207267165184, "signal/brier_reward/centered_abs_mean": 0.10833604633808136, "signal/brier_reward/group_bin_occupancy": 0.858203125, "signal/brier_reward/group_std_mean": 0.13938207030296326, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010833604633808136, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010833604633808136, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012309185788035392, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85, "signal/confidence_uniqueness_reward/group_std_mean": 0.016317157819867135, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012309186393395066, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012309186393395066, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002327501564286649, "signal/frontier_aurc_reward/group_bin_occupancy": 0.724609375, "signal/frontier_aurc_reward/group_std_mean": 0.003778242599219084, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.909376962634269e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.909376962634269e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14588625729084015, "signal/frontier_coverage_0/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_0/group_std_mean": 0.1870903730392456, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018235782859846949, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018235782859846949, "signal/frontier_coverage_1/centered_abs_mean": 0.14588625729084015, "signal/frontier_coverage_1/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_1/group_std_mean": 0.1870903730392456, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018235782859846949, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018235782859846949, "signal/frontier_coverage_10/centered_abs_mean": 0.14415820091962814, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.18487805426120757, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001801977539435029, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001801977539435029, "signal/frontier_coverage_15/centered_abs_mean": 0.13210653364658356, "signal/frontier_coverage_15/group_bin_occupancy": 0.8671875, "signal/frontier_coverage_15/group_std_mean": 0.16943660378456116, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016513317124918104, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016513317124918104, "signal/frontier_coverage_20/centered_abs_mean": 0.08194544017314911, "signal/frontier_coverage_20/group_bin_occupancy": 0.884375, "signal/frontier_coverage_20/group_std_mean": 0.10515519231557846, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001024318009149283, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001024318009149283, "signal/frontier_coverage_25/centered_abs_mean": 0.053516195714473726, "signal/frontier_coverage_25/group_bin_occupancy": 0.92109375, "signal/frontier_coverage_25/group_std_mean": 0.06882122904062271, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006689524743705988, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006689524743705988, "signal/frontier_coverage_5/centered_abs_mean": 0.14475657939910888, "signal/frontier_coverage_5/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_5/group_std_mean": 0.18564701378345488, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018094572937116028, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018094572937116028, "signal/frontier_ece_reward/centered_abs_mean": 0.00575404018163681, "signal/frontier_ece_reward/group_bin_occupancy": 0.846875, "signal/frontier_ece_reward/group_std_mean": 0.008270268887281417, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005754040437750518, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005754040437750518, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2830525994300842, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35513145923614503, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028305261209607125, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028305261209607125, "step": 285 }, { "calibration/aurc": 0.4236405616968358, "calibration/batch_distribution_entropy": 0.9818024399334021, "calibration/batch_entropy_100bins": 0.9521496186391095, "calibration/batch_entropy_10bins": 0.9818024399334021, "calibration/batch_entropy_50bins": 0.9757064291499068, "calibration/batch_uniqueness": 0.9665863037109375, "calibration/buffer_distribution_entropy": 0.9987764022593268, "calibration/buffer_entropy_100bins": 0.9868602362750798, "calibration/buffer_entropy_10bins": 0.9987764022593268, "calibration/buffer_entropy_50bins": 0.9947180018038975, "calibration/confidence_entropy": 0.5021496893703811, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.014453125, "calibration/coverage@15%": 0.028515625, "calibration/coverage@20%": 0.050390625, "calibration/coverage@25%": 0.062109375, "calibration/coverage@30%": 0.205078125, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.13476296875, "calibration/mean_confidence": 0.506945, "calibration/prompt_uniqueness": 0.864794921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 853.6, "completions/max_terminated_length": 853.6, "completions/mean_length": 172.291015625, "completions/mean_terminated_length": 172.291015625, "completions/min_length": 79.8, "completions/min_terminated_length": 79.8, "epoch": 0.928, "grad_norm": 0.0007745189359411597, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 974862198.0, "reward": 0.9235079884529114, "reward_std": 0.07905451804399491, "rewards/accuracy_reward": 0.5291015625, "rewards/brier_reward": 0.7884802699089051, "rewards/confidence_uniqueness_reward": 0.9643653869628906, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0033654853235930205, "rewards/frontier_coverage_0": 0.11306976824998856, "rewards/frontier_coverage_1": 0.11306976824998856, "rewards/frontier_coverage_10": 0.1117068201303482, "rewards/frontier_coverage_15": 0.1046798437833786, "rewards/frontier_coverage_20": 0.07044542729854583, "rewards/frontier_coverage_25": 0.05282995253801346, "rewards/frontier_coverage_5": 0.1121548593044281, "rewards/frontier_ece_reward": 0.002680363832041621, "rewards/frontier_entropy_batch_reward": -0.2502776861190796, "signal/accuracy_reward/centered_abs_mean": 0.07894287109375, "signal/accuracy_reward/group_bin_occupancy": 0.16484375, "signal/accuracy_reward/group_std_mean": 0.10790681540966034, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039471435546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039471435546875, "signal/advantage_abs_mean": 0.06104508712887764, "signal/advantage_pre_scale_abs_mean": 0.06104508712887764, "signal/advantage_pre_scale_std": 0.09825572371482849, "signal/advantage_std": 0.09825572371482849, "signal/brier_reward/centered_abs_mean": 0.11147891283035279, "signal/brier_reward/group_bin_occupancy": 0.84375, "signal/brier_reward/group_std_mean": 0.14375323951244354, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011147891730070114, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011147891730070114, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013390088081359863, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125, "signal/confidence_uniqueness_reward/group_std_mean": 0.016832890920341015, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013390088919550181, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013390088919550181, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029725271509960295, "signal/frontier_aurc_reward/group_bin_occupancy": 0.69453125, "signal/frontier_aurc_reward/group_std_mean": 0.004932621866464615, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7156591133680195e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7156591133680195e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.146697798371315, "signal/frontier_coverage_0/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_0/group_std_mean": 0.1891067087650299, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018337224144488573, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018337224144488573, "signal/frontier_coverage_1/centered_abs_mean": 0.146697798371315, "signal/frontier_coverage_1/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_1/group_std_mean": 0.1891067087650299, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018337224144488573, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018337224144488573, "signal/frontier_coverage_10/centered_abs_mean": 0.14494749903678894, "signal/frontier_coverage_10/group_bin_occupancy": 0.875, "signal/frontier_coverage_10/group_std_mean": 0.1868603676557541, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001811843877658248, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001811843877658248, "signal/frontier_coverage_15/centered_abs_mean": 0.13379482328891754, "signal/frontier_coverage_15/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_15/group_std_mean": 0.17280838787555694, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016724353889003396, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016724353889003396, "signal/frontier_coverage_20/centered_abs_mean": 0.0833568200469017, "signal/frontier_coverage_20/group_bin_occupancy": 0.88984375, "signal/frontier_coverage_20/group_std_mean": 0.10767639130353927, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010419602738693356, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010419602738693356, "signal/frontier_coverage_25/centered_abs_mean": 0.05601404085755348, "signal/frontier_coverage_25/group_bin_occupancy": 0.9171875, "signal/frontier_coverage_25/group_std_mean": 0.07191484123468399, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007001755409874022, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007001755409874022, "signal/frontier_coverage_5/centered_abs_mean": 0.1458159238100052, "signal/frontier_coverage_5/group_bin_occupancy": 0.8734375, "signal/frontier_coverage_5/group_std_mean": 0.1879925400018692, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018226990709081293, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018226990709081293, "signal/frontier_ece_reward/centered_abs_mean": 0.005948805715888739, "signal/frontier_ece_reward/group_bin_occupancy": 0.857421875, "signal/frontier_ece_reward/group_std_mean": 0.008423867449164391, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005948805715888739, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005948805715888739, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2958779692649841, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3708716452121735, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029587796702980996, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029587796702980996, "step": 290 }, { "calibration/aurc": 0.23491377854573506, "calibration/batch_distribution_entropy": 0.9814271545653476, "calibration/batch_entropy_100bins": 0.9529836470801094, "calibration/batch_entropy_10bins": 0.9814271545653476, "calibration/batch_entropy_50bins": 0.9721020286908271, "calibration/batch_uniqueness": 0.9633582912444499, "calibration/buffer_distribution_entropy": 0.9988420216830608, "calibration/buffer_entropy_100bins": 0.9863146646855876, "calibration/buffer_entropy_10bins": 0.9988420216830608, "calibration/buffer_entropy_50bins": 0.9948149826034557, "calibration/confidence_entropy": 0.5013591675015299, "calibration/coverage@0%": 0.03478167808219178, "calibration/coverage@1%": 0.03478167808219178, "calibration/coverage@10%": 0.24629250244618395, "calibration/coverage@15%": 0.3827597541585127, "calibration/coverage@20%": 0.4863770486790607, "calibration/coverage@25%": 0.5665117416829746, "calibration/coverage@30%": 0.6513538099315068, "calibration/coverage@5%": 0.09147810665362036, "calibration/ece": 0.1117759863472358, "calibration/mean_confidence": 0.44929093207252935, "calibration/prompt_uniqueness": 0.8603180280957335, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 853.6, "completions/max_terminated_length": 632.8, "completions/mean_length": 170.640625, "completions/mean_terminated_length": 170.50676574707032, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.944, "grad_norm": 0.0011778445914387703, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 991584982.0, "reward": 0.9295665860176087, "reward_std": 0.08568341732025146, "rewards/accuracy_reward": 0.54228515625, "rewards/brier_reward": 0.7837172031402588, "rewards/confidence_uniqueness_reward": 0.9617600560188293, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0024823052808642387, "rewards/frontier_coverage_0": 0.10404116213321686, "rewards/frontier_coverage_1": 0.10404116213321686, "rewards/frontier_coverage_10": 0.10361252054572105, "rewards/frontier_coverage_15": 0.09421005547046661, "rewards/frontier_coverage_20": 0.06851446852087975, "rewards/frontier_coverage_25": 0.04667741134762764, "rewards/frontier_coverage_5": 0.10377772152423859, "rewards/frontier_ece_reward": 0.001886871492024511, "rewards/frontier_entropy_batch_reward": -0.23994633555412292, "signal/accuracy_reward/centered_abs_mean": 0.102056884765625, "signal/accuracy_reward/group_bin_occupancy": 0.172265625, "signal/accuracy_reward/group_std_mean": 0.13405922651290894, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0510284423828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0510284423828125, "signal/advantage_abs_mean": 0.06657596528530121, "signal/advantage_pre_scale_abs_mean": 0.06657596528530121, "signal/advantage_pre_scale_std": 0.10576380938291549, "signal/advantage_std": 0.10576380938291549, "signal/brier_reward/centered_abs_mean": 0.11052304357290268, "signal/brier_reward/group_bin_occupancy": 0.847265625, "signal/brier_reward/group_std_mean": 0.14318473637104034, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01105230450630188, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01105230450630188, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014990394562482834, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8671875, "signal/confidence_uniqueness_reward/group_std_mean": 0.019529133662581445, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014990394469350577, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014990394469350577, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018933590967208148, "signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375, "signal/frontier_aurc_reward/group_std_mean": 0.003138176305219531, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3666988272452728e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3666988272452728e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17136546075344086, "signal/frontier_coverage_0/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_0/group_std_mean": 0.21953192353248596, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002142068138346076, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002142068138346076, "signal/frontier_coverage_1/centered_abs_mean": 0.17136546075344086, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.21953192353248596, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002142068138346076, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002142068138346076, "signal/frontier_coverage_10/centered_abs_mean": 0.169466295838356, "signal/frontier_coverage_10/group_bin_occupancy": 0.865625, "signal/frontier_coverage_10/group_std_mean": 0.217143777012825, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021183287259191274, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021183287259191274, "signal/frontier_coverage_15/centered_abs_mean": 0.15599824488162994, "signal/frontier_coverage_15/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_15/group_std_mean": 0.19977592229843139, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019499780144542455, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019499780144542455, "signal/frontier_coverage_20/centered_abs_mean": 0.09491551518440247, "signal/frontier_coverage_20/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_20/group_std_mean": 0.12218321114778519, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011864439584314823, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011864439584314823, "signal/frontier_coverage_25/centered_abs_mean": 0.05510400533676148, "signal/frontier_coverage_25/group_bin_occupancy": 0.90078125, "signal/frontier_coverage_25/group_std_mean": 0.07128551304340362, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006888000760227441, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006888000760227441, "signal/frontier_coverage_5/centered_abs_mean": 0.1706299215555191, "signal/frontier_coverage_5/group_bin_occupancy": 0.86875, "signal/frontier_coverage_5/group_std_mean": 0.2185954213142395, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00213287400547415, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00213287400547415, "signal/frontier_ece_reward/centered_abs_mean": 0.005686651263386011, "signal/frontier_ece_reward/group_bin_occupancy": 0.8765625, "signal/frontier_ece_reward/group_std_mean": 0.0076931707561016084, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005686651449650526, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005686651449650526, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28447132706642153, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3533455073833466, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02844713404774666, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02844713404774666, "step": 295 }, { "calibration/aurc": 0.34800169365994993, "calibration/batch_distribution_entropy": 0.9812203477810147, "calibration/batch_entropy_100bins": 0.9538863455386668, "calibration/batch_entropy_10bins": 0.9812203477810147, "calibration/batch_entropy_50bins": 0.9751755655335739, "calibration/batch_uniqueness": 0.9656280517578125, "calibration/buffer_distribution_entropy": 0.9989293186275472, "calibration/buffer_entropy_100bins": 0.9856529750536842, "calibration/buffer_entropy_10bins": 0.9989293186275472, "calibration/buffer_entropy_50bins": 0.9948214021847231, "calibration/confidence_entropy": 0.47452948094860803, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.08203125, "calibration/coverage@15%": 0.226171875, "calibration/coverage@20%": 0.2671875, "calibration/coverage@25%": 0.3203125, "calibration/coverage@30%": 0.391015625, "calibration/coverage@5%": 0.010546875, "calibration/ece": 0.16367422836523438, "calibration/mean_confidence": 0.5006282908652343, "calibration/prompt_uniqueness": 0.855078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 744.0, "completions/max_terminated_length": 550.0, "completions/mean_length": 168.0302734375, "completions/mean_terminated_length": 167.89690246582032, "completions/min_length": 73.4, "completions/min_terminated_length": 73.4, "epoch": 0.96, "grad_norm": 0.0010172611800953746, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 1008245932.0, "reward": 0.9288432955741882, "reward_std": 0.07382949590682983, "rewards/accuracy_reward": 0.5306640625, "rewards/brier_reward": 0.8049243211746215, "rewards/confidence_uniqueness_reward": 0.9639307618141174, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0031038288958370685, "rewards/frontier_coverage_0": 0.13451858162879943, "rewards/frontier_coverage_1": 0.13451858162879943, "rewards/frontier_coverage_10": 0.13342588990926743, "rewards/frontier_coverage_15": 0.12053216546773911, "rewards/frontier_coverage_20": 0.08194337785243988, "rewards/frontier_coverage_25": 0.05771302729845047, "rewards/frontier_coverage_5": 0.13427408933639526, "rewards/frontier_ece_reward": 0.0028995629400014877, "rewards/frontier_entropy_batch_reward": -0.23538158535957338, "signal/accuracy_reward/centered_abs_mean": 0.07486572265625, "signal/accuracy_reward/group_bin_occupancy": 0.16328125, "signal/accuracy_reward/group_std_mean": 0.10253596603870392, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037432861328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.037432861328125, "signal/advantage_abs_mean": 0.056790337711572644, "signal/advantage_pre_scale_abs_mean": 0.056790337711572644, "signal/advantage_pre_scale_std": 0.09253572970628739, "signal/advantage_std": 0.09253572970628739, "signal/brier_reward/centered_abs_mean": 0.10669752955436707, "signal/brier_reward/group_bin_occupancy": 0.825, "signal/brier_reward/group_std_mean": 0.13931142389774323, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010669752955436707, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010669752955436707, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013811485469341278, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.859375, "signal/confidence_uniqueness_reward/group_std_mean": 0.01783113442361355, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001381148537620902, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001381148537620902, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002649222710169852, "signal/frontier_aurc_reward/group_bin_occupancy": 0.711328125, "signal/frontier_aurc_reward/group_std_mean": 0.004259026004001498, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.311528380436357e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.311528380436357e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1507669657468796, "signal/frontier_coverage_0/group_bin_occupancy": 0.848046875, "signal/frontier_coverage_0/group_std_mean": 0.19579726755619048, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001884587062522769, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001884587062522769, "signal/frontier_coverage_1/centered_abs_mean": 0.1507669657468796, "signal/frontier_coverage_1/group_bin_occupancy": 0.848046875, "signal/frontier_coverage_1/group_std_mean": 0.19579726755619048, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001884587062522769, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001884587062522769, "signal/frontier_coverage_10/centered_abs_mean": 0.1489147961139679, "signal/frontier_coverage_10/group_bin_occupancy": 0.848046875, "signal/frontier_coverage_10/group_std_mean": 0.19344739615917206, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018614350352436303, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018614350352436303, "signal/frontier_coverage_15/centered_abs_mean": 0.13582422733306884, "signal/frontier_coverage_15/group_bin_occupancy": 0.847265625, "signal/frontier_coverage_15/group_std_mean": 0.17663869857788086, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001697802823036909, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001697802823036909, "signal/frontier_coverage_20/centered_abs_mean": 0.08488290458917618, "signal/frontier_coverage_20/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_20/group_std_mean": 0.11030421555042266, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010610363446176053, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010610363446176053, "signal/frontier_coverage_25/centered_abs_mean": 0.05705864131450653, "signal/frontier_coverage_25/group_bin_occupancy": 0.923828125, "signal/frontier_coverage_25/group_std_mean": 0.07287163138389588, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007132330210879445, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007132330210879445, "signal/frontier_coverage_5/centered_abs_mean": 0.15014611780643464, "signal/frontier_coverage_5/group_bin_occupancy": 0.84765625, "signal/frontier_coverage_5/group_std_mean": 0.1949920028448105, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00187682646792382, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00187682646792382, "signal/frontier_ece_reward/centered_abs_mean": 0.0062250176444649695, "signal/frontier_ece_reward/group_bin_occupancy": 0.844921875, "signal/frontier_ece_reward/group_std_mean": 0.008787142857909203, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006225017714314163, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006225017714314163, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2811248004436493, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72109375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3526135325431824, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028112480789422988, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028112480789422988, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4737306351081816, "eval_calibration/batch_distribution_entropy": 0.9045024028911264, "eval_calibration/batch_entropy_100bins": 0.6949329777280961, "eval_calibration/batch_entropy_10bins": 0.9045024028911264, "eval_calibration/batch_entropy_50bins": 0.7689095013084137, "eval_calibration/batch_uniqueness": 0.8984375, "eval_calibration/buffer_distribution_entropy": 0.9988096055976916, "eval_calibration/buffer_entropy_100bins": 0.9849748824561931, "eval_calibration/buffer_entropy_10bins": 0.9988096055976916, "eval_calibration/buffer_entropy_50bins": 0.9946333864318222, "eval_calibration/confidence_entropy": 0.4787274667660325, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.0546875, "eval_calibration/coverage@25%": 0.15625, "eval_calibration/coverage@30%": 0.1640625, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.21578124999999998, "eval_calibration/mean_confidence": 0.47687499999999994, "eval_calibration/prompt_uniqueness": 0.8984375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 422.0, "eval_completions/max_terminated_length": 422.0, "eval_completions/mean_length": 173.66567993164062, "eval_completions/mean_terminated_length": 173.66567993164062, "eval_completions/min_length": 87.0, "eval_completions/min_terminated_length": 87.0, "eval_loss": 0.0, "eval_num_tokens": 1008245932.0, "eval_reward": 0.7935565859079361, "eval_reward_std": 0.2286548987030983, "eval_rewards/accuracy_reward": 0.416015625, "eval_rewards/brier_reward": 0.799243688583374, "eval_rewards/confidence_uniqueness_reward": 0.91015625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003638996509835124, "eval_rewards/frontier_coverage_0": 0.19927702844142914, "eval_rewards/frontier_coverage_1": 0.19927702844142914, "eval_rewards/frontier_coverage_10": 0.19661162421107292, "eval_rewards/frontier_coverage_15": 0.17882990464568138, "eval_rewards/frontier_coverage_20": 0.10959831066429615, "eval_rewards/frontier_coverage_25": 0.057190462946891785, "eval_rewards/frontier_coverage_5": 0.19842347875237465, "eval_rewards/frontier_ece_reward": 0.004141724260989577, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 21.0746, "eval_samples_per_second": 23.725, "eval_signal/accuracy_reward/centered_abs_mean": 0.4656982421875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49005643278360367, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23284912109375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23284912109375, "eval_signal/advantage_abs_mean": 0.21324742957949638, "eval_signal/advantage_pre_scale_abs_mean": 0.21324742957949638, "eval_signal/advantage_pre_scale_std": 0.22612683847546577, "eval_signal/advantage_std": 0.22612683847546577, "eval_signal/brier_reward/centered_abs_mean": 0.18935201317071915, "eval_signal/brier_reward/group_bin_occupancy": 0.90625, "eval_signal/brier_reward/group_std_mean": 0.24315428733825684, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01893520262092352, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01893520262092352, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0336761474609375, "eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3203125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.038827759213745594, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003367614757735282, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003367614757735282, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004632304655387998, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.703125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008442466845735908, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.790380964754149e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.790380964754149e-05, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3491540476679802, "eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_0/group_std_mean": 0.42496294528245926, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004364425898529589, "eval_signal/frontier_coverage_0/weight": 0.012500000186264515, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004364425898529589, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3491540476679802, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.42496294528245926, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004364425898529589, "eval_signal/frontier_coverage_1/weight": 0.012500000186264515, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004364425898529589, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3441574051976204, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.4191160574555397, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043019677978008986, "eval_signal/frontier_coverage_10/weight": 0.012500000186264515, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043019677978008986, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.31247151643037796, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.38298317044973373, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003905894060153514, "eval_signal/frontier_coverage_15/weight": 0.012500000186264515, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003905894060153514, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.17517539486289024, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9140625, "eval_signal/frontier_coverage_20/group_std_mean": 0.22201964259147644, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021896924590691924, "eval_signal/frontier_coverage_20/weight": 0.012500000186264515, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021896924590691924, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09041432663798332, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_25/group_std_mean": 0.11431009136140347, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011301790946163237, "eval_signal/frontier_coverage_25/weight": 0.012500000186264515, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011301790946163237, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.34793277829885483, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.42353837192058563, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004349160008132458, "eval_signal/frontier_coverage_5/weight": 0.012500000186264515, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004349160008132458, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.007637793896719813, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.984375, "eval_signal/frontier_ece_reward/group_std_mean": 0.009760213550180197, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007637794187758118, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007637794187758118, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.19, "step": 300 }, { "calibration/aurc": 0.25243171988357693, "calibration/batch_distribution_entropy": 0.9739037924606638, "calibration/batch_entropy_100bins": 0.9439049482785213, "calibration/batch_entropy_10bins": 0.9739037924606638, "calibration/batch_entropy_50bins": 0.9694166202887426, "calibration/batch_uniqueness": 0.966259765625, "calibration/buffer_distribution_entropy": 0.99871030928833, "calibration/buffer_entropy_100bins": 0.9844605941497815, "calibration/buffer_entropy_10bins": 0.99871030928833, "calibration/buffer_entropy_50bins": 0.9945275683891899, "calibration/confidence_entropy": 0.5045930767500602, "calibration/coverage@0%": 0.035546875, "calibration/coverage@1%": 0.035546875, "calibration/coverage@10%": 0.274609375, "calibration/coverage@15%": 0.432421875, "calibration/coverage@20%": 0.501953125, "calibration/coverage@25%": 0.563671875, "calibration/coverage@30%": 0.607421875, "calibration/coverage@5%": 0.09296875, "calibration/ece": 0.11844418789062501, "calibration/mean_confidence": 0.522810125390625, "calibration/prompt_uniqueness": 0.87255859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1185.4, "completions/max_terminated_length": 604.8, "completions/mean_length": 172.418359375, "completions/mean_terminated_length": 171.7501678466797, "completions/min_length": 79.6, "completions/min_terminated_length": 79.6, "epoch": 0.976, "grad_norm": 0.001320485258474946, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 1024872616.0, "reward": 0.9389370799064636, "reward_std": 0.08163964003324509, "rewards/accuracy_reward": 0.5552734375, "rewards/brier_reward": 0.8005570650100708, "rewards/confidence_uniqueness_reward": 0.9648543715476989, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.00284066004678607, "rewards/frontier_coverage_0": 0.10627357796765864, "rewards/frontier_coverage_1": 0.10627357796765864, "rewards/frontier_coverage_10": 0.10575458101229743, "rewards/frontier_coverage_15": 0.09721773080527782, "rewards/frontier_coverage_20": 0.07075041458010674, "rewards/frontier_coverage_25": 0.051219668984413144, "rewards/frontier_coverage_5": 0.10608085230924189, "rewards/frontier_ece_reward": 0.0025254017557017503, "rewards/frontier_entropy_batch_reward": -0.23258313536643982, "signal/accuracy_reward/centered_abs_mean": 0.08868408203125, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.11722440421581268, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044342041015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044342041015625, "signal/advantage_abs_mean": 0.06320648193359375, "signal/advantage_pre_scale_abs_mean": 0.06320648193359375, "signal/advantage_pre_scale_std": 0.10139600187540054, "signal/advantage_std": 0.10139600187540054, "signal/brier_reward/centered_abs_mean": 0.1057712584733963, "signal/brier_reward/group_bin_occupancy": 0.85, "signal/brier_reward/group_std_mean": 0.13689170479774476, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0105771254748106, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0105771254748106, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013170672208070755, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.855078125, "signal/confidence_uniqueness_reward/group_std_mean": 0.01751931421458721, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001317067281343043, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001317067281343043, "signal/format_reward/centered_abs_mean": 0.000909423828125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.002030306123197079, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004547119140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004547119140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002454556990414858, "signal/frontier_aurc_reward/group_bin_occupancy": 0.70625, "signal/frontier_aurc_reward/group_std_mean": 0.004182360181584954, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.068196165258996e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.068196165258996e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.151848965883255, "signal/frontier_coverage_0/group_bin_occupancy": 0.868359375, "signal/frontier_coverage_0/group_std_mean": 0.19617189466953278, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001898112171329558, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001898112171329558, "signal/frontier_coverage_1/centered_abs_mean": 0.151848965883255, "signal/frontier_coverage_1/group_bin_occupancy": 0.868359375, "signal/frontier_coverage_1/group_std_mean": 0.19617189466953278, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001898112171329558, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001898112171329558, "signal/frontier_coverage_10/centered_abs_mean": 0.1494060769677162, "signal/frontier_coverage_10/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_10/group_std_mean": 0.1930826336145401, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018675760366022587, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018675760366022587, "signal/frontier_coverage_15/centered_abs_mean": 0.1364602714776993, "signal/frontier_coverage_15/group_bin_occupancy": 0.860546875, "signal/frontier_coverage_15/group_std_mean": 0.1767397940158844, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017057533143088222, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017057533143088222, "signal/frontier_coverage_20/centered_abs_mean": 0.08021349385380745, "signal/frontier_coverage_20/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_20/group_std_mean": 0.10482732057571412, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010026687057688832, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010026687057688832, "signal/frontier_coverage_25/centered_abs_mean": 0.0531280666589737, "signal/frontier_coverage_25/group_bin_occupancy": 0.922265625, "signal/frontier_coverage_25/group_std_mean": 0.06855799853801728, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006641008774749934, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006641008774749934, "signal/frontier_coverage_5/centered_abs_mean": 0.1511296510696411, "signal/frontier_coverage_5/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_5/group_std_mean": 0.19528249204158782, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018891207640990616, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018891207640990616, "signal/frontier_ece_reward/centered_abs_mean": 0.006002122722566128, "signal/frontier_ece_reward/group_bin_occupancy": 0.861328125, "signal/frontier_ece_reward/group_std_mean": 0.008093012310564519, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006002122885547578, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006002122885547578, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28755232095718386, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3637108564376831, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02875523306429386, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02875523306429386, "step": 305 }, { "calibration/aurc": 0.36275381546387697, "calibration/batch_distribution_entropy": 0.9818363303055356, "calibration/batch_entropy_100bins": 0.9516448277738757, "calibration/batch_entropy_10bins": 0.9818363303055356, "calibration/batch_entropy_50bins": 0.9751574070879462, "calibration/batch_uniqueness": 0.965179443359375, "calibration/buffer_distribution_entropy": 0.9985763272881746, "calibration/buffer_entropy_100bins": 0.9834172411685647, "calibration/buffer_entropy_10bins": 0.9985763272881746, "calibration/buffer_entropy_50bins": 0.9944091208256998, "calibration/confidence_entropy": 0.5024404900454493, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.019921875, "calibration/coverage@15%": 0.05546875, "calibration/coverage@20%": 0.108203125, "calibration/coverage@25%": 0.294921875, "calibration/coverage@30%": 0.43984375, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.11440790949765625, "calibration/mean_confidence": 0.4765774462054687, "calibration/prompt_uniqueness": 0.871875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 780.4, "completions/max_terminated_length": 620.6, "completions/mean_length": 166.96044921875, "completions/mean_terminated_length": 166.827734375, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.992, "grad_norm": 0.0008437388460151851, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 1041710771.0, "reward": 0.9289904713630677, "reward_std": 0.07655752152204513, "rewards/accuracy_reward": 0.5341796875, "rewards/brier_reward": 0.8037751078605652, "rewards/confidence_uniqueness_reward": 0.9644991874694824, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002967359917238355, "rewards/frontier_coverage_0": 0.12212227135896683, "rewards/frontier_coverage_1": 0.12212227135896683, "rewards/frontier_coverage_10": 0.12049156278371811, "rewards/frontier_coverage_15": 0.11204217970371247, "rewards/frontier_coverage_20": 0.07388233989477158, "rewards/frontier_coverage_25": 0.055122246593236925, "rewards/frontier_coverage_5": 0.1214935302734375, "rewards/frontier_ece_reward": 0.0031089282827451827, "rewards/frontier_entropy_batch_reward": -0.2424273669719696, "signal/accuracy_reward/centered_abs_mean": 0.0775146484375, "signal/accuracy_reward/group_bin_occupancy": 0.1625, "signal/accuracy_reward/group_std_mean": 0.1029381737112999, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03875732421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03875732421875, "signal/advantage_abs_mean": 0.05987899079918861, "signal/advantage_pre_scale_abs_mean": 0.05987899079918861, "signal/advantage_pre_scale_std": 0.09616845995187759, "signal/advantage_std": 0.09616845995187759, "signal/brier_reward/centered_abs_mean": 0.10155004113912583, "signal/brier_reward/group_bin_occupancy": 0.844140625, "signal/brier_reward/group_std_mean": 0.1313195899128914, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010155004076659679, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010155004076659679, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012845552526414394, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.878515625, "signal/confidence_uniqueness_reward/group_std_mean": 0.016458464972674846, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012845552759245039, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012845552759245039, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002551899803802371, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625, "signal/frontier_aurc_reward/group_std_mean": 0.004267166648060083, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1898749148240314e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1898749148240314e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14079180657863616, "signal/frontier_coverage_0/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_0/group_std_mean": 0.18092852234840393, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001759897661395371, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001759897661395371, "signal/frontier_coverage_1/centered_abs_mean": 0.14079180657863616, "signal/frontier_coverage_1/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_1/group_std_mean": 0.18092852234840393, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001759897661395371, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001759897661395371, "signal/frontier_coverage_10/centered_abs_mean": 0.13867213428020478, "signal/frontier_coverage_10/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_10/group_std_mean": 0.17820720970630646, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017334016738459468, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017334016738459468, "signal/frontier_coverage_15/centered_abs_mean": 0.12698494046926498, "signal/frontier_coverage_15/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_15/group_std_mean": 0.1636158138513565, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015873117838054896, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015873117838054896, "signal/frontier_coverage_20/centered_abs_mean": 0.07213507741689681, "signal/frontier_coverage_20/group_bin_occupancy": 0.887890625, "signal/frontier_coverage_20/group_std_mean": 0.09348605275154113, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009016884723678231, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009016884723678231, "signal/frontier_coverage_25/centered_abs_mean": 0.05218314677476883, "signal/frontier_coverage_25/group_bin_occupancy": 0.930078125, "signal/frontier_coverage_25/group_std_mean": 0.0668656125664711, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006522893439978361, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006522893439978361, "signal/frontier_coverage_5/centered_abs_mean": 0.13967403918504714, "signal/frontier_coverage_5/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_5/group_std_mean": 0.17946992814540863, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017459255410358309, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017459255410358309, "signal/frontier_ece_reward/centered_abs_mean": 0.007050628308206797, "signal/frontier_ece_reward/group_bin_occupancy": 0.812109375, "signal/frontier_ece_reward/group_std_mean": 0.011934582144021988, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007050628308206796, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007050628308206796, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2894311249256134, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.711328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36359102725982667, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028943114355206488, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028943114355206488, "step": 310 }, { "calibration/aurc": 0.28309316461584144, "calibration/batch_distribution_entropy": 0.9562440520005147, "calibration/batch_entropy_100bins": 0.9321305829954412, "calibration/batch_entropy_10bins": 0.9562440520005147, "calibration/batch_entropy_50bins": 0.9521604451830256, "calibration/batch_uniqueness": 0.9661712646484375, "calibration/buffer_distribution_entropy": 0.9986985889836495, "calibration/buffer_entropy_100bins": 0.9827654244956366, "calibration/buffer_entropy_10bins": 0.9986985889836495, "calibration/buffer_entropy_50bins": 0.994458498244382, "calibration/confidence_entropy": 0.4959383976784825, "calibration/coverage@0%": 0.013671875, "calibration/coverage@1%": 0.013671875, "calibration/coverage@10%": 0.07421875, "calibration/coverage@15%": 0.1591796875, "calibration/coverage@20%": 0.3056640625, "calibration/coverage@25%": 0.3720703125, "calibration/coverage@30%": 0.59765625, "calibration/coverage@5%": 0.013671875, "calibration/ece": 0.14605446661523436, "calibration/mean_confidence": 0.6033933338027344, "calibration/prompt_uniqueness": 0.8629150390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/max_terminated_length": 391.0, "completions/mean_length": 162.71500396728516, "completions/mean_terminated_length": 162.71500396728516, "completions/min_length": 73.5, "completions/min_terminated_length": 73.5, "epoch": 0.9984, "num_tokens": 1048384942.0, "reward": 0.9344164729118347, "reward_std": 0.08154623582959175, "rewards/accuracy_reward": 0.558837890625, "rewards/brier_reward": 0.7741440236568451, "rewards/confidence_uniqueness_reward": 0.9663105010986328, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0030248835682868958, "rewards/frontier_coverage_0": 0.06300802156329155, "rewards/frontier_coverage_1": 0.06300802156329155, "rewards/frontier_coverage_10": 0.062442582100629807, "rewards/frontier_coverage_15": 0.055601296946406364, "rewards/frontier_coverage_20": 0.03809378854930401, "rewards/frontier_coverage_25": 0.03736502677202225, "rewards/frontier_coverage_5": 0.06258464232087135, "rewards/frontier_ece_reward": 0.0019081256468780339, "rewards/frontier_entropy_batch_reward": -0.23977214097976685, "signal/accuracy_reward/centered_abs_mean": 0.0806732177734375, "signal/accuracy_reward/group_bin_occupancy": 0.1630859375, "signal/accuracy_reward/group_std_mean": 0.1065446101129055, "signal/accuracy_reward/group_zero_std_frac": 0.6953125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04033660888671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04033660888671875, "signal/advantage_abs_mean": 0.06426878273487091, "signal/advantage_pre_scale_abs_mean": 0.06426878273487091, "signal/advantage_pre_scale_std": 0.10264430195093155, "signal/advantage_std": 0.10264430195093155, "signal/brier_reward/centered_abs_mean": 0.11038177087903023, "signal/brier_reward/group_bin_occupancy": 0.8701171875, "signal/brier_reward/group_std_mean": 0.14014140516519547, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011038177646696568, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011038177646696568, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011780858039855957, "signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8720703125, "signal/confidence_uniqueness_reward/group_std_mean": 0.015235808677971363, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011780858621932566, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011780858621932566, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002542344154790044, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7236328125, "signal/frontier_aurc_reward/group_std_mean": 0.0040895091369748116, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1779301025380846e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1779301025380846e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13523942232131958, "signal/frontier_coverage_0/group_bin_occupancy": 0.8798828125, "signal/frontier_coverage_0/group_std_mean": 0.17214351892471313, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016904928488656878, "signal/frontier_coverage_0/weight": 0.012500000186264515, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016904928488656878, "signal/frontier_coverage_1/centered_abs_mean": 0.13523942232131958, "signal/frontier_coverage_1/group_bin_occupancy": 0.8798828125, "signal/frontier_coverage_1/group_std_mean": 0.17214351892471313, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016904928488656878, "signal/frontier_coverage_1/weight": 0.012500000186264515, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016904928488656878, "signal/frontier_coverage_10/centered_abs_mean": 0.13328810781240463, "signal/frontier_coverage_10/group_bin_occupancy": 0.8740234375, "signal/frontier_coverage_10/group_std_mean": 0.16966666281223297, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016661013942211866, "signal/frontier_coverage_10/weight": 0.012500000186264515, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016661013942211866, "signal/frontier_coverage_15/centered_abs_mean": 0.1213008388876915, "signal/frontier_coverage_15/group_bin_occupancy": 0.8662109375, "signal/frontier_coverage_15/group_std_mean": 0.15479815006256104, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001516260497737676, "signal/frontier_coverage_15/weight": 0.012500000186264515, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001516260497737676, "signal/frontier_coverage_20/centered_abs_mean": 0.065843116492033, "signal/frontier_coverage_20/group_bin_occupancy": 0.8955078125, "signal/frontier_coverage_20/group_std_mean": 0.08446861431002617, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008230389503296465, "signal/frontier_coverage_20/weight": 0.012500000186264515, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008230389503296465, "signal/frontier_coverage_25/centered_abs_mean": 0.04858388379216194, "signal/frontier_coverage_25/group_bin_occupancy": 0.91015625, "signal/frontier_coverage_25/group_std_mean": 0.06284799799323082, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006072985415812582, "signal/frontier_coverage_25/weight": 0.012500000186264515, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006072985415812582, "signal/frontier_coverage_5/centered_abs_mean": 0.1348385065793991, "signal/frontier_coverage_5/group_bin_occupancy": 0.876953125, "signal/frontier_coverage_5/group_std_mean": 0.17164986580610275, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001685481343884021, "signal/frontier_coverage_5/weight": 0.012500000186264515, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001685481343884021, "signal/frontier_ece_reward/centered_abs_mean": 0.006206750171259046, "signal/frontier_ece_reward/group_bin_occupancy": 0.8408203125, "signal/frontier_ece_reward/group_std_mean": 0.009092409629374743, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006206750113051385, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006206750113051385, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30052025616168976, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37405627965927124, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03005202580243349, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03005202580243349, "step": 312, "total_flos": 0.0, "train_loss": 0.004423426932846315, "train_runtime": 60056.3002, "train_samples_per_second": 0.333, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1048384942, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }