{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6429490567891648, "calibration/batch_distribution_entropy": 0.6568061886012901, "calibration/confidence_entropy": 0.346240177384674, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4945882363067394, "calibration/mean_confidence": 0.7883452419087591, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04150390625, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 212.3419921875, "completions/mean_terminated_length": 221.52340393066407, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.06983423233032227, "learning_rate": 3.1249999999999997e-07, "loss": 0.0083, "num_tokens": 17018414.0, "reward": 0.7155525326728821, "reward_std": 0.601375138759613, "rewards/accuracy_reward": 0.2197265625, "rewards/brier_reward": 0.37495601177215576, "rewards/confidence_uniqueness_reward": 0.4819000899791718, "rewards/format_reward": 0.67919921875, "rewards/frontier_aurc_reward": 0.3020222902297974, "rewards/frontier_ece_reward": 0.3020222902297974, "rewards/frontier_entropy_batch_reward": -0.6498908281326294, "rewards/volume_coverage_0": 0.3020222902297974, "rewards/volume_coverage_1": 0.3020222902297974, "rewards/volume_coverage_10": 0.3020222902297974, "rewards/volume_coverage_15": 0.3020222902297974, "rewards/volume_coverage_20": 0.3020222902297974, "rewards/volume_coverage_25": 0.3020222902297974, "rewards/volume_coverage_5": 0.3020222902297974, "signal/accuracy_reward/centered_abs_mean": 0.2397705078125, "signal/accuracy_reward/group_std_mean": 0.2794930338859558, "signal/accuracy_reward/group_zero_std_frac": 0.33125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11988525390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11988525390625, "signal/advantage_abs_mean": 0.5115305304527282, "signal/advantage_pre_scale_abs_mean": 0.5115305304527282, "signal/advantage_pre_scale_std": 0.6212727189064026, "signal/advantage_std": 0.6212727189064026, "signal/brier_reward/centered_abs_mean": 0.31964564323425293, "signal/brier_reward/group_std_mean": 0.36430342197418214, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03196456506848335, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03196456506848335, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.298951119184494, "signal/confidence_uniqueness_reward/group_std_mean": 0.3491382360458374, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02989511229097843, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02989511229097843, "signal/format_reward/centered_abs_mean": 0.407049560546875, "signal/format_reward/group_std_mean": 0.45584299564361574, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2035247802734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2035247802734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.2922212302684784, "signal/frontier_aurc_reward/group_std_mean": 0.3429143726825714, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036527654621750115, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036527654621750115, "signal/frontier_ece_reward/centered_abs_mean": 0.2922212302684784, "signal/frontier_ece_reward/group_std_mean": 0.3429143726825714, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02922212369740009, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4286865532398224, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4737535834312439, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042868655920028684, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042868655920028684, "signal/volume_coverage_0/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_0/group_std_mean": 0.3429143726825714, "signal/volume_coverage_0/group_zero_std_frac": 0.003125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_1/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_1/group_std_mean": 0.3429143726825714, "signal/volume_coverage_1/group_zero_std_frac": 0.003125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_10/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_10/group_std_mean": 0.3429143726825714, "signal/volume_coverage_10/group_zero_std_frac": 0.003125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_15/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_15/group_std_mean": 0.3429143726825714, "signal/volume_coverage_15/group_zero_std_frac": 0.003125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_20/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_20/group_std_mean": 0.3429143726825714, "signal/volume_coverage_20/group_zero_std_frac": 0.003125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_25/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_25/group_std_mean": 0.3429143726825714, "signal/volume_coverage_25/group_zero_std_frac": 0.003125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_5/centered_abs_mean": 0.2922212302684784, "signal/volume_coverage_5/group_std_mean": 0.3429143726825714, "signal/volume_coverage_5/group_zero_std_frac": 0.003125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.02922212369740009, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.02922212369740009, "step": 5 }, { "calibration/aurc": 0.6774772714272642, "calibration/batch_distribution_entropy": 0.6704348051300373, "calibration/confidence_entropy": 0.3531415986791937, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5158973434001635, "calibration/mean_confidence": 0.7822890515633215, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0384765625, "completions/max_length": 1481.2, "completions/max_terminated_length": 1481.2, "completions/mean_length": 205.05126953125, "completions/mean_terminated_length": 213.29146423339844, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.01851228065788746, "learning_rate": 6.249999999999999e-07, "loss": 0.0124, "num_tokens": 34218491.0, "reward": 0.7374850273132324, "reward_std": 0.5725616097450257, "rewards/accuracy_reward": 0.21396484375, "rewards/brier_reward": 0.3848159670829773, "rewards/confidence_uniqueness_reward": 0.5191182971000672, "rewards/format_reward": 0.72490234375, "rewards/frontier_aurc_reward": 0.3033848226070404, "rewards/frontier_ece_reward": 0.3033848226070404, "rewards/frontier_entropy_batch_reward": -0.6884217381477356, "rewards/volume_coverage_0": 0.3033848226070404, "rewards/volume_coverage_1": 0.3033848226070404, "rewards/volume_coverage_10": 0.3033848226070404, "rewards/volume_coverage_15": 0.3033848226070404, "rewards/volume_coverage_20": 0.3033848226070404, "rewards/volume_coverage_25": 0.3033848226070404, "rewards/volume_coverage_5": 0.3033848226070404, "signal/accuracy_reward/centered_abs_mean": 0.222235107421875, "signal/accuracy_reward/group_std_mean": 0.26479236483573915, "signal/accuracy_reward/group_zero_std_frac": 0.346875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1111175537109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1111175537109375, "signal/advantage_abs_mean": 0.47654428482055666, "signal/advantage_pre_scale_abs_mean": 0.47654428482055666, "signal/advantage_pre_scale_std": 0.5928633451461792, "signal/advantage_std": 0.5928633451461792, "signal/brier_reward/centered_abs_mean": 0.30740639567375183, "signal/brier_reward/group_std_mean": 0.35535589456558225, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030740641802549363, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030740641802549363, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2774076223373413, "signal/confidence_uniqueness_reward/group_std_mean": 0.3378679931163788, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027740763500332832, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027740763500332832, "signal/format_reward/centered_abs_mean": 0.369842529296875, "signal/format_reward/group_std_mean": 0.43356016278266907, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1849212646484375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1849212646484375, "signal/frontier_aurc_reward/centered_abs_mean": 0.28120753169059753, "signal/frontier_aurc_reward/group_std_mean": 0.3347383916378021, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003515094378963113, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003515094378963113, "signal/frontier_ece_reward/centered_abs_mean": 0.28120753169059753, "signal/frontier_ece_reward/group_std_mean": 0.3347383916378021, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.028120755031704903, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3998861491680145, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4569081485271454, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03998861610889435, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03998861610889435, "signal/volume_coverage_0/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_0/group_std_mean": 0.3347383916378021, "signal/volume_coverage_0/group_zero_std_frac": 0.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_1/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_1/group_std_mean": 0.3347383916378021, "signal/volume_coverage_1/group_zero_std_frac": 0.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_10/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_10/group_std_mean": 0.3347383916378021, "signal/volume_coverage_10/group_zero_std_frac": 0.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_15/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_15/group_std_mean": 0.3347383916378021, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_20/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_20/group_std_mean": 0.3347383916378021, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_25/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_25/group_std_mean": 0.3347383916378021, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_5/centered_abs_mean": 0.28120753169059753, "signal/volume_coverage_5/group_std_mean": 0.3347383916378021, "signal/volume_coverage_5/group_zero_std_frac": 0.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.028120755031704903, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.028120755031704903, "step": 10 }, { "calibration/aurc": 0.5685390614142158, "calibration/batch_distribution_entropy": 0.6455145518487925, "calibration/buffer_distribution_entropy": 0.670253051700831, "calibration/confidence_entropy": 0.34757602436925944, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.45315780046924337, "calibration/mean_confidence": 0.8027396171602854, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0193359375, "completions/max_length": 1439.4, "completions/max_terminated_length": 1439.4, "completions/mean_length": 172.82841796875, "completions/mean_terminated_length": 176.34844970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 11.4, "epoch": 0.048, "grad_norm": 0.022836102172732353, "learning_rate": 9.374999999999999e-07, "loss": 0.0084, "num_tokens": 51036990.0, "reward": 0.865392017364502, "reward_std": 0.44560941457748415, "rewards/accuracy_reward": 0.28134765625, "rewards/brier_reward": 0.49504044055938723, "rewards/confidence_uniqueness_reward": 0.6459718704223633, "rewards/format_reward": 0.8869140625, "rewards/frontier_aurc_reward": 0.3092373930849135, "rewards/frontier_ece_reward": 0.2995997928082943, "rewards/frontier_entropy_batch_reward": -0.8416913747787476, "rewards/volume_coverage_0": 0.3107194304991089, "rewards/volume_coverage_1": 0.3107194304991089, "rewards/volume_coverage_10": 0.3107194304991089, "rewards/volume_coverage_15": 0.3107194304991089, "rewards/volume_coverage_20": 0.3107194304991089, "rewards/volume_coverage_25": 0.3107194331706008, "rewards/volume_coverage_5": 0.3107194304991089, "signal/accuracy_reward/centered_abs_mean": 0.197674560546875, "signal/accuracy_reward/group_std_mean": 0.2459003061056137, "signal/accuracy_reward/group_zero_std_frac": 0.35625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0988372802734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0988372802734375, "signal/advantage_abs_mean": 0.3519722521305084, "signal/advantage_pre_scale_abs_mean": 0.3519722521305084, "signal/advantage_pre_scale_std": 0.4639441788196564, "signal/advantage_std": 0.4639441788196564, "signal/brier_reward/centered_abs_mean": 0.2721615880727768, "signal/brier_reward/group_std_mean": 0.3270188093185425, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027216159179806708, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.027216159179806708, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19719513058662413, "signal/confidence_uniqueness_reward/group_std_mean": 0.2604415327310562, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01971951425075531, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01971951425075531, "signal/format_reward/centered_abs_mean": 0.18668212890625, "signal/format_reward/group_std_mean": 0.2880357503890991, "signal/format_reward/group_zero_std_frac": 0.071875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.093341064453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.093341064453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.21899721091613172, "signal/frontier_aurc_reward/group_std_mean": 0.26340931504964826, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0027374652767321096, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0027374652767321096, "signal/frontier_ece_reward/centered_abs_mean": 0.24538690745830535, "signal/frontier_ece_reward/group_std_mean": 0.2943758606910706, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024538691714406015, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024538691714406015, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2529719710350037, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35891305804252627, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025297198072075845, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025297198072075845, "signal/volume_coverage_0/centered_abs_mean": 0.21790143859763106, "signal/volume_coverage_0/group_std_mean": 0.261889320824187, "signal/volume_coverage_0/group_zero_std_frac": 0.2, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_1/centered_abs_mean": 0.21790143859763106, "signal/volume_coverage_1/group_std_mean": 0.261889320824187, "signal/volume_coverage_1/group_zero_std_frac": 0.2, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_10/centered_abs_mean": 0.21790143859763106, "signal/volume_coverage_10/group_std_mean": 0.261889320824187, "signal/volume_coverage_10/group_zero_std_frac": 0.2, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_15/centered_abs_mean": 0.21790143859763106, "signal/volume_coverage_15/group_std_mean": 0.261889320824187, "signal/volume_coverage_15/group_zero_std_frac": 0.2, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_20/centered_abs_mean": 0.21790143859763106, "signal/volume_coverage_20/group_std_mean": 0.261889320824187, "signal/volume_coverage_20/group_zero_std_frac": 0.2, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_25/centered_abs_mean": 0.21790144193124733, "signal/volume_coverage_25/group_std_mean": 0.26188932565360795, "signal/volume_coverage_25/group_zero_std_frac": 0.15, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.02179014531071184, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.02179014531071184, "signal/volume_coverage_5/centered_abs_mean": 0.21790143859763106, "signal/volume_coverage_5/group_std_mean": 0.261889320824187, "signal/volume_coverage_5/group_zero_std_frac": 0.2, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.021790144977350195, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.021790144977350195, "step": 15 }, { "calibration/aurc": 0.5202567853912221, "calibration/batch_distribution_entropy": 0.7380507805905504, "calibration/buffer_distribution_entropy": 0.6726920946793268, "calibration/confidence_entropy": 0.3816252415285025, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.351275651830115, "calibration/mean_confidence": 0.7499806626748107, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00556640625, "completions/max_length": 1163.2, "completions/max_terminated_length": 1163.2, "completions/mean_length": 130.587109375, "completions/mean_terminated_length": 131.34015197753905, "completions/min_length": 0.0, "completions/min_terminated_length": 22.4, "epoch": 0.064, "grad_norm": 0.004004279151558876, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 67292602.0, "reward": 0.7010321021080017, "reward_std": 0.19609815776348113, "rewards/accuracy_reward": 0.344140625, "rewards/brier_reward": 0.5915492057800293, "rewards/confidence_uniqueness_reward": 0.7575255513191224, "rewards/format_reward": 0.9775390625, "rewards/frontier_aurc_reward": -0.00692891301587224, "rewards/frontier_ece_reward": -0.042981109907850625, "rewards/frontier_entropy_batch_reward": -0.9033052682876587, "rewards/volume_coverage_0": 2.1003434658162236e-09, "rewards/volume_coverage_1": 2.1003434658162236e-09, "rewards/volume_coverage_10": 2.1003434658162236e-09, "rewards/volume_coverage_15": 4.33701768831618e-08, "rewards/volume_coverage_20": 5.88921527877595e-08, "rewards/volume_coverage_25": 1.2825582353936938e-07, "rewards/volume_coverage_5": 2.1003434658162236e-09, "signal/accuracy_reward/centered_abs_mean": 0.19951171875, "signal/accuracy_reward/group_std_mean": 0.2516826242208481, "signal/accuracy_reward/group_zero_std_frac": 0.340625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099755859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.099755859375, "signal/advantage_abs_mean": 0.1468222200870514, "signal/advantage_pre_scale_abs_mean": 0.1468222200870514, "signal/advantage_pre_scale_std": 0.21374104022979737, "signal/advantage_std": 0.21374104022979737, "signal/brier_reward/centered_abs_mean": 0.2464316189289093, "signal/brier_reward/group_std_mean": 0.30184549689292905, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02464316114783287, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02464316114783287, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12081145346164704, "signal/confidence_uniqueness_reward/group_std_mean": 0.15997391939163208, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01208114568144083, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01208114568144083, "signal/format_reward/centered_abs_mean": 0.04210205078125, "signal/format_reward/group_std_mean": 0.09824754893779755, "signal/format_reward/group_zero_std_frac": 0.53125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.021051025390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.021051025390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.004936764482408762, "signal/frontier_aurc_reward/group_std_mean": 0.006853995472192764, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.17095582128968e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.17095582128968e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.13821439445018768, "signal/frontier_ece_reward/group_std_mean": 0.16570349037647247, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013821440003812312, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013821440003812312, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1694903701543808, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3000731647014618, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016949037089943886, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016949037089943886, "signal/volume_coverage_0/centered_abs_mean": 3.4365328471785974e-09, "signal/volume_coverage_0/group_std_mean": 5.13227842446895e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.903125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_1/centered_abs_mean": 3.4365328471785974e-09, "signal/volume_coverage_1/group_std_mean": 5.13227842446895e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.903125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_10/centered_abs_mean": 3.4365328471785974e-09, "signal/volume_coverage_10/group_std_mean": 5.13227842446895e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.903125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_15/centered_abs_mean": 7.97853836442819e-08, "signal/volume_coverage_15/group_std_mean": 1.1836238478224459e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.903125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.978538937858381e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 7.978538937858381e-09, "signal/volume_coverage_20/centered_abs_mean": 1.0703116398724788e-07, "signal/volume_coverage_20/group_std_mean": 1.584521130748584e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.803125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0703117145349773e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.0703117145349773e-08, "signal/volume_coverage_25/centered_abs_mean": 2.0704597146758986e-07, "signal/volume_coverage_25/group_std_mean": 3.073400936637327e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.7, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.0704598141518814e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.0704598141518814e-08, "signal/volume_coverage_5/centered_abs_mean": 3.4365328471785974e-09, "signal/volume_coverage_5/group_std_mean": 5.13227842446895e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.903125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.4365328083207915e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.4365328083207915e-10, "step": 20 }, { "calibration/aurc": 0.637877780884015, "calibration/batch_distribution_entropy": 0.8666604687716237, "calibration/buffer_distribution_entropy": 0.7172808030818212, "calibration/confidence_entropy": 0.4574993969776974, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3524438351727639, "calibration/mean_confidence": 0.650396233605561, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 983.2, "completions/max_terminated_length": 983.2, "completions/mean_length": 112.5263671875, "completions/mean_terminated_length": 112.76943817138672, "completions/min_length": 0.0, "completions/min_terminated_length": 35.4, "epoch": 0.08, "grad_norm": 0.00510385213419795, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 83378024.0, "reward": 0.7345107555389404, "reward_std": 0.17090575098991395, "rewards/accuracy_reward": 0.35263671875, "rewards/brier_reward": 0.6509084701538086, "rewards/confidence_uniqueness_reward": 0.8421475172042847, "rewards/format_reward": 0.99287109375, "rewards/frontier_aurc_reward": -0.005880103260278702, "rewards/frontier_ece_reward": -0.03344872035086155, "rewards/frontier_entropy_batch_reward": -0.841304075717926, "rewards/volume_coverage_0": 1.3084958672138214e-10, "rewards/volume_coverage_1": 1.3084958672138214e-10, "rewards/volume_coverage_10": 3.28643133817863e-09, "rewards/volume_coverage_15": 3.28643133817863e-09, "rewards/volume_coverage_20": 1.2797271364828333e-08, "rewards/volume_coverage_25": 2.003043941045668e-07, "rewards/volume_coverage_5": 1.3084958672138214e-10, "signal/accuracy_reward/centered_abs_mean": 0.191363525390625, "signal/accuracy_reward/group_std_mean": 0.23868935704231262, "signal/accuracy_reward/group_zero_std_frac": 0.378125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0956817626953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0956817626953125, "signal/advantage_abs_mean": 0.13158616423606873, "signal/advantage_pre_scale_abs_mean": 0.13158616423606873, "signal/advantage_pre_scale_std": 0.1908570796251297, "signal/advantage_std": 0.1908570796251297, "signal/brier_reward/centered_abs_mean": 0.23046765923500062, "signal/brier_reward/group_std_mean": 0.28209164142608645, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023046765848994254, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023046765848994254, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07654989808797837, "signal/confidence_uniqueness_reward/group_std_mean": 0.10619462579488755, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007654989883303642, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007654989883303642, "signal/format_reward/centered_abs_mean": 0.013677978515625, "signal/format_reward/group_std_mean": 0.03662779070436954, "signal/format_reward/group_zero_std_frac": 0.80625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0068389892578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0068389892578125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003424457693472505, "signal/frontier_aurc_reward/group_std_mean": 0.005068050231784582, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2805721750482914e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2805721750482914e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.11674794852733612, "signal/frontier_ece_reward/group_std_mean": 0.1464279443025589, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011674795113503934, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011674795113503934, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2606357991695404, "signal/frontier_entropy_batch_reward/group_std_mean": 0.399100261926651, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02606358118355274, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02606358118355274, "signal/volume_coverage_0/centered_abs_mean": 3.6999710295582134e-09, "signal/volume_coverage_0/group_std_mean": 5.391393764764274e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.909375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.699970829718069e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.699970829718069e-10, "signal/volume_coverage_1/centered_abs_mean": 3.6999710295582134e-09, "signal/volume_coverage_1/group_std_mean": 5.391393764764274e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.909375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.699970829718069e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.699970829718069e-10, "signal/volume_coverage_10/centered_abs_mean": 1.4863867259240492e-08, "signal/volume_coverage_10/group_std_mean": 2.1319809062614327e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.859375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4863867053849233e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.4863867053849233e-09, "signal/volume_coverage_15/centered_abs_mean": 1.4863867259240492e-08, "signal/volume_coverage_15/group_std_mean": 2.1319809062614327e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.859375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4863867053849233e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.4863867053849233e-09, "signal/volume_coverage_20/centered_abs_mean": 4.194976913618476e-08, "signal/volume_coverage_20/group_std_mean": 6.138168572311642e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.809375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.1949774792771065e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 4.1949774792771065e-09, "signal/volume_coverage_25/centered_abs_mean": 3.951772380994534e-07, "signal/volume_coverage_25/group_std_mean": 5.592273702115236e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.68125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.951772487575944e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.951772487575944e-08, "signal/volume_coverage_5/centered_abs_mean": 3.6999710295582134e-09, "signal/volume_coverage_5/group_std_mean": 5.391393764764274e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.909375, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.699970829718069e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.699970829718069e-10, "step": 25 }, { "calibration/aurc": 0.6324060218490095, "calibration/batch_distribution_entropy": 0.9494056926566277, "calibration/buffer_distribution_entropy": 0.7762817425645073, "calibration/confidence_entropy": 0.5202123272314586, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.27552858640012845, "calibration/mean_confidence": 0.5462257194993483, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00185546875, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 106.41845703125, "completions/mean_terminated_length": 106.6145523071289, "completions/min_length": 0.0, "completions/min_terminated_length": 33.8, "epoch": 0.096, "grad_norm": 0.004249365534633398, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 99512357.0, "reward": 0.7657663106918335, "reward_std": 0.16878970265388488, "rewards/accuracy_reward": 0.35322265625, "rewards/brier_reward": 0.6870332479476928, "rewards/confidence_uniqueness_reward": 0.9030344367027283, "rewards/format_reward": 0.994140625, "rewards/frontier_aurc_reward": -0.005292004905641079, "rewards/frontier_ece_reward": -0.026083091273903847, "rewards/frontier_entropy_batch_reward": -0.6424768328666687, "rewards/volume_coverage_0": 1.1482879369584253e-09, "rewards/volume_coverage_1": 1.1482879369584253e-09, "rewards/volume_coverage_10": 1.1152872342323761e-08, "rewards/volume_coverage_15": 7.12950278647373e-08, "rewards/volume_coverage_20": 9.590478232124333e-08, "rewards/volume_coverage_25": 1.634254282123493e-07, "rewards/volume_coverage_5": 1.1482879369584253e-09, "signal/accuracy_reward/centered_abs_mean": 0.191375732421875, "signal/accuracy_reward/group_std_mean": 0.24138614535331726, "signal/accuracy_reward/group_zero_std_frac": 0.3625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0956878662109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0956878662109375, "signal/advantage_abs_mean": 0.12984309047460557, "signal/advantage_pre_scale_abs_mean": 0.12984309047460557, "signal/advantage_pre_scale_std": 0.18535825312137605, "signal/advantage_std": 0.18535825312137605, "signal/brier_reward/centered_abs_mean": 0.2213761627674103, "signal/brier_reward/group_std_mean": 0.2727460443973541, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02213761620223522, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02213761620223522, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0604395791888237, "signal/confidence_uniqueness_reward/group_std_mean": 0.08873669505119323, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060439580120146275, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060439580120146275, "signal/format_reward/centered_abs_mean": 0.01134033203125, "signal/format_reward/group_std_mean": 0.03280932120978832, "signal/format_reward/group_zero_std_frac": 0.815625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005670166015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005670166015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026173558086156844, "signal/frontier_aurc_reward/group_std_mean": 0.004071610467508435, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.271694804425351e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.271694804425351e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.09653272926807403, "signal/frontier_ece_reward/group_std_mean": 0.1304735615849495, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009653273224830627, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009653273224830627, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4387050747871399, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5275961101055145, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.043870508670806885, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043870508670806885, "signal/volume_coverage_0/centered_abs_mean": 2.9389548905633945e-09, "signal/volume_coverage_0/group_std_mean": 3.83164269202041e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.938955051545733e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.938955051545733e-10, "signal/volume_coverage_1/centered_abs_mean": 2.9389548905633945e-09, "signal/volume_coverage_1/group_std_mean": 3.83164269202041e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.938955051545733e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.938955051545733e-10, "signal/volume_coverage_10/centered_abs_mean": 2.317042134825087e-08, "signal/volume_coverage_10/group_std_mean": 3.1277936196616454e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.784375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.317042160915328e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.317042160915328e-09, "signal/volume_coverage_15/centered_abs_mean": 8.99444128843463e-08, "signal/volume_coverage_15/group_std_mean": 1.1935079364810973e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.734375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.994441605403302e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 8.994441605403302e-09, "signal/volume_coverage_20/centered_abs_mean": 1.1986816018660916e-07, "signal/volume_coverage_20/group_std_mean": 1.592895981783471e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.684375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.1986816453313231e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.1986816453313231e-08, "signal/volume_coverage_25/centered_abs_mean": 2.4859488831907586e-07, "signal/volume_coverage_25/group_std_mean": 3.3005767701155265e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.534375, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.485949108788077e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.485949108788077e-08, "signal/volume_coverage_5/centered_abs_mean": 2.9389548905633945e-09, "signal/volume_coverage_5/group_std_mean": 3.83164269202041e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.938955051545733e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.938955051545733e-10, "step": 30 }, { "calibration/aurc": 0.5107678081003135, "calibration/batch_distribution_entropy": 0.9589194598555851, "calibration/buffer_distribution_entropy": 0.8460931740982854, "calibration/confidence_entropy": 0.5240642540622189, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.003137254901960784, "calibration/coverage@20%": 0.00392156862745098, "calibration/coverage@25%": 0.022745098039215685, "calibration/coverage@30%": 0.02627450980392157, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17553510974376438, "calibration/mean_confidence": 0.42006594777913764, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00302734375, "completions/max_length": 603.0, "completions/max_terminated_length": 603.0, "completions/mean_length": 100.521484375, "completions/mean_terminated_length": 100.82618560791016, "completions/min_length": 0.0, "completions/min_terminated_length": 36.4, "epoch": 0.112, "grad_norm": 0.009812681004405022, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 115651169.0, "reward": 0.8234237790107727, "reward_std": 0.15335985720157624, "rewards/accuracy_reward": 0.3880859375, "rewards/brier_reward": 0.7183116436004638, "rewards/confidence_uniqueness_reward": 0.9424231290817261, "rewards/format_reward": 0.99404296875, "rewards/frontier_aurc_reward": -0.004633870534598827, "rewards/frontier_ece_reward": -0.005527885630726814, "rewards/frontier_entropy_batch_reward": -0.3310348570346832, "rewards/volume_coverage_0": 7.422226350195515e-09, "rewards/volume_coverage_1": 7.422226350195515e-09, "rewards/volume_coverage_10": 1.2741683225181433e-08, "rewards/volume_coverage_15": 2.8442976557352395e-08, "rewards/volume_coverage_20": 3.798906951790748e-08, "rewards/volume_coverage_25": 8.618370428981059e-08, "rewards/volume_coverage_5": 7.422226350195515e-09, "signal/accuracy_reward/centered_abs_mean": 0.1885009765625, "signal/accuracy_reward/group_std_mean": 0.23862674236297607, "signal/accuracy_reward/group_zero_std_frac": 0.3625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09425048828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09425048828125, "signal/advantage_abs_mean": 0.11734064370393753, "signal/advantage_pre_scale_abs_mean": 0.11734064370393753, "signal/advantage_pre_scale_std": 0.17148884534835815, "signal/advantage_std": 0.17148884534835815, "signal/brier_reward/centered_abs_mean": 0.20886878669261932, "signal/brier_reward/group_std_mean": 0.25930328369140626, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020886879414319992, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020886879414319992, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031046908348798752, "signal/confidence_uniqueness_reward/group_std_mean": 0.055822306871414186, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031046907417476175, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031046907417476175, "signal/format_reward/centered_abs_mean": 0.011505126953125, "signal/format_reward/group_std_mean": 0.03268913105130196, "signal/format_reward/group_zero_std_frac": 0.81875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057525634765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0057525634765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017553700832650065, "signal/frontier_aurc_reward/group_std_mean": 0.002825619326904416, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.194212611357216e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.194212611357216e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06550199910998344, "signal/frontier_ece_reward/group_std_mean": 0.09871184825897217, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006550200004130602, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006550200004130602, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.41493695974349976, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4780768632888794, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.041493697464466094, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.041493697464466094, "signal/volume_coverage_0/centered_abs_mean": 1.0677126127678349e-08, "signal/volume_coverage_0/group_std_mean": 1.309429287221775e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.85, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.0677125621139093e-09, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.0677125621139093e-09, "signal/volume_coverage_1/centered_abs_mean": 1.0677126127678349e-08, "signal/volume_coverage_1/group_std_mean": 1.309429287221775e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.85, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.0677125621139093e-09, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.0677125621139093e-09, "signal/volume_coverage_10/centered_abs_mean": 1.7244384153958236e-08, "signal/volume_coverage_10/group_std_mean": 2.1109557413190318e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.85, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7244385246140138e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.7244385246140138e-09, "signal/volume_coverage_15/centered_abs_mean": 4.001330921044044e-08, "signal/volume_coverage_15/group_std_mean": 4.904761328194951e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.846875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.001331171260558e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 4.001331171260558e-09, "signal/volume_coverage_20/centered_abs_mean": 6.514198828133643e-08, "signal/volume_coverage_20/group_std_mean": 8.038617682615268e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.746875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.514199287627198e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 6.514199287627198e-09, "signal/volume_coverage_25/centered_abs_mean": 1.9327816443981405e-07, "signal/volume_coverage_25/group_std_mean": 2.4028904537232166e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.746875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.9327816443842628e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.9327816443842628e-08, "signal/volume_coverage_5/centered_abs_mean": 1.0677126127678349e-08, "signal/volume_coverage_5/group_std_mean": 1.309429287221775e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.85, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.0677125621139093e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.0677125621139093e-09, "step": 35 }, { "calibration/aurc": 0.5580233719668242, "calibration/batch_distribution_entropy": 0.9514386602270696, "calibration/buffer_distribution_entropy": 0.9009901987870197, "calibration/confidence_entropy": 0.5205907217133923, "calibration/coverage@0%": 0.0019700600666023465, "calibration/coverage@1%": 0.0019700600666023465, "calibration/coverage@10%": 0.0019700600666023465, "calibration/coverage@15%": 0.0019700600666023465, "calibration/coverage@20%": 0.01103295882481093, "calibration/coverage@25%": 0.023240795493448697, "calibration/coverage@30%": 0.03229591360368491, "calibration/coverage@5%": 0.0019700600666023465, "calibration/ece": 0.18875669542512402, "calibration/mean_confidence": 0.39032947552856273, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00224609375, "completions/max_length": 603.8, "completions/max_terminated_length": 603.8, "completions/mean_length": 104.0462890625, "completions/mean_terminated_length": 104.28194122314453, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.128, "grad_norm": 0.007568053435534239, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 131633275.0, "reward": 0.8242883682250977, "reward_std": 0.13872416019439698, "rewards/accuracy_reward": 0.3837890625, "rewards/brier_reward": 0.7208372712135315, "rewards/confidence_uniqueness_reward": 0.9432681918144226, "rewards/format_reward": 0.99482421875, "rewards/frontier_aurc_reward": -0.004502659384161234, "rewards/frontier_ece_reward": -0.0009349806932732463, "rewards/frontier_entropy_batch_reward": -0.3127904772758484, "rewards/volume_coverage_0": 3.823643576761349e-09, "rewards/volume_coverage_1": 3.823643576761349e-09, "rewards/volume_coverage_10": 2.2619500181231268e-08, "rewards/volume_coverage_15": 4.996604976159613e-08, "rewards/volume_coverage_20": 5.745271669244101e-08, "rewards/volume_coverage_25": 7.44545543190922e-08, "rewards/volume_coverage_5": 3.823643576761349e-09, "signal/accuracy_reward/centered_abs_mean": 0.16480712890625, "signal/accuracy_reward/group_std_mean": 0.21133655607700347, "signal/accuracy_reward/group_zero_std_frac": 0.4125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.082403564453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.082403564453125, "signal/advantage_abs_mean": 0.10589775294065476, "signal/advantage_pre_scale_abs_mean": 0.10589775294065476, "signal/advantage_pre_scale_std": 0.15886488556861877, "signal/advantage_std": 0.15886488556861877, "signal/brier_reward/centered_abs_mean": 0.20160384476184845, "signal/brier_reward/group_std_mean": 0.25164816081523894, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020160384848713873, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020160384848713873, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024899404495954514, "signal/confidence_uniqueness_reward/group_std_mean": 0.046268679201602936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024899405427277086, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024899405427277086, "signal/format_reward/centered_abs_mean": 0.010015869140625, "signal/format_reward/group_std_mean": 0.028942330926656722, "signal/format_reward/group_zero_std_frac": 0.8375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0050079345703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0050079345703125, "signal/frontier_aurc_reward/centered_abs_mean": 0.001641789567656815, "signal/frontier_aurc_reward/group_std_mean": 0.0024957799818366767, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.052236923191231e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.052236923191231e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05462209209799766, "signal/frontier_ece_reward/group_std_mean": 0.08531963378190995, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005462209228426218, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005462209228426218, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3865876078605652, "signal/frontier_entropy_batch_reward/group_std_mean": 0.45401414632797243, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038658761978149415, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038658761978149415, "signal/volume_coverage_0/centered_abs_mean": 5.4679469368990045e-09, "signal/volume_coverage_0/group_std_mean": 6.669823321425738e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.467947296333708e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.467947296333708e-10, "signal/volume_coverage_1/centered_abs_mean": 5.4679469368990045e-09, "signal/volume_coverage_1/group_std_mean": 6.669823321425738e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.467947296333708e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.467947296333708e-10, "signal/volume_coverage_10/centered_abs_mean": 2.886184602146624e-08, "signal/volume_coverage_10/group_std_mean": 3.515927660679985e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.8375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.886184567035821e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.886184567035821e-09, "signal/volume_coverage_15/centered_abs_mean": 6.196580612793934e-08, "signal/volume_coverage_15/group_std_mean": 7.554549857347049e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.7875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.196581029543902e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.196581029543902e-09, "signal/volume_coverage_20/centered_abs_mean": 7.125139318275853e-08, "signal/volume_coverage_20/group_std_mean": 8.68546331800335e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.125139948188642e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 7.125139948188642e-09, "signal/volume_coverage_25/centered_abs_mean": 9.226549457508338e-08, "signal/volume_coverage_25/group_std_mean": 1.124894589976666e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.7, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 9.226549527868722e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 9.226549527868722e-09, "signal/volume_coverage_5/centered_abs_mean": 5.4679469368990045e-09, "signal/volume_coverage_5/group_std_mean": 6.669823321425738e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.8875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.467947296333708e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.467947296333708e-10, "step": 40 }, { "calibration/aurc": 0.45307609323828046, "calibration/batch_distribution_entropy": 0.9821801695393473, "calibration/buffer_distribution_entropy": 0.9315210180147012, "calibration/confidence_entropy": 0.5304064561224848, "calibration/coverage@0%": 0.0007820144324853229, "calibration/coverage@1%": 0.0007820144324853229, "calibration/coverage@10%": 0.0007820144324853229, "calibration/coverage@15%": 0.0007820144324853229, "calibration/coverage@20%": 0.053125764432485324, "calibration/coverage@25%": 0.08515701443248533, "calibration/coverage@30%": 0.1871101394324853, "calibration/coverage@5%": 0.0007820144324853229, "calibration/ece": 0.2287805560797294, "calibration/mean_confidence": 0.4682809404281139, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 609.6, "completions/max_terminated_length": 609.6, "completions/mean_length": 105.6619140625, "completions/mean_terminated_length": 105.80623168945313, "completions/min_length": 0.0, "completions/min_terminated_length": 41.2, "epoch": 0.144, "grad_norm": 0.0022722198627889156, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 147665685.0, "reward": 0.8855258345603942, "reward_std": 0.13922611474990845, "rewards/accuracy_reward": 0.484765625, "rewards/brier_reward": 0.6958995342254639, "rewards/confidence_uniqueness_reward": 0.9512767195701599, "rewards/format_reward": 0.99755859375, "rewards/frontier_aurc_reward": -0.00416339784860611, "rewards/frontier_ece_reward": 0.001959370821714401, "rewards/frontier_entropy_batch_reward": -0.20497798323631286, "rewards/volume_coverage_0": 8.200039328110087e-11, "rewards/volume_coverage_1": 8.200039328110087e-11, "rewards/volume_coverage_10": -5.721240280143203e-10, "rewards/volume_coverage_15": -5.721240280143203e-10, "rewards/volume_coverage_20": -4.1206314610464643e-10, "rewards/volume_coverage_25": -5.415158502164452e-10, "rewards/volume_coverage_5": 8.200039328110087e-11, "signal/accuracy_reward/centered_abs_mean": 0.17095947265625, "signal/accuracy_reward/group_std_mean": 0.22554005682468414, "signal/accuracy_reward/group_zero_std_frac": 0.35625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.085479736328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.085479736328125, "signal/advantage_abs_mean": 0.10604511946439743, "signal/advantage_pre_scale_abs_mean": 0.10604511946439743, "signal/advantage_pre_scale_std": 0.15817178189754486, "signal/advantage_std": 0.15817178189754486, "signal/brier_reward/centered_abs_mean": 0.21632620096206664, "signal/brier_reward/group_std_mean": 0.263543963432312, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021632620692253114, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021632620692253114, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017246781662106515, "signal/confidence_uniqueness_reward/group_std_mean": 0.028721674531698226, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017246782314032315, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017246782314032315, "signal/format_reward/centered_abs_mean": 0.004718017578125, "signal/format_reward/group_std_mean": 0.01347437030635774, "signal/format_reward/group_zero_std_frac": 0.925, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0023590087890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0023590087890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021471860352903606, "signal/frontier_aurc_reward/group_std_mean": 0.003108612261712551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6839824204216712e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6839824204216712e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06187872663140297, "signal/frontier_ece_reward/group_std_mean": 0.0878511056303978, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006187872681766749, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006187872681766749, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.293925142288208, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37315127849578855, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0293925154954195, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0293925154954195, "signal/volume_coverage_0/centered_abs_mean": 6.908986494025271e-10, "signal/volume_coverage_0/group_std_mean": 8.69764738009593e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.908987236486918e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.908987236486918e-11, "signal/volume_coverage_1/centered_abs_mean": 6.908986494025271e-10, "signal/volume_coverage_1/group_std_mean": 8.69764738009593e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.908987236486918e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.908987236486918e-11, "signal/volume_coverage_10/centered_abs_mean": 5.539405401044917e-09, "signal/volume_coverage_10/group_std_mean": 7.0430595022763495e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.539405042304102e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 5.539405042304102e-10, "signal/volume_coverage_15/centered_abs_mean": 5.539405401044917e-09, "signal/volume_coverage_15/group_std_mean": 7.0430595022763495e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.9375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.539405042304102e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.539405042304102e-10, "signal/volume_coverage_20/centered_abs_mean": 1.066335202914992e-08, "signal/volume_coverage_20/group_std_mean": 1.3604591753146878e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0663351941025968e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.0663351941025968e-09, "signal/volume_coverage_25/centered_abs_mean": 1.540027541602207e-08, "signal/volume_coverage_25/group_std_mean": 1.962254233545124e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.75, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.5400275683169485e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.5400275683169485e-09, "signal/volume_coverage_5/centered_abs_mean": 6.908986494025271e-10, "signal/volume_coverage_5/group_std_mean": 8.69764738009593e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.9875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.908987236486918e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 6.908987236486918e-11, "step": 45 }, { "calibration/aurc": 0.5383383939508992, "calibration/batch_distribution_entropy": 0.9907718482645086, "calibration/buffer_distribution_entropy": 0.9470614664673189, "calibration/confidence_entropy": 0.5212315823406769, "calibration/coverage@0%": 0.000392156862745098, "calibration/coverage@1%": 0.000392156862745098, "calibration/coverage@10%": 0.000392156862745098, "calibration/coverage@15%": 0.000392156862745098, "calibration/coverage@20%": 0.000392156862745098, "calibration/coverage@25%": 0.0019577145926863897, "calibration/coverage@30%": 0.0019577145926863897, "calibration/coverage@5%": 0.000392156862745098, "calibration/ece": 0.21815604108163839, "calibration/mean_confidence": 0.5193597284981454, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0017578125, "completions/max_length": 911.6, "completions/max_terminated_length": 911.6, "completions/mean_length": 107.3876953125, "completions/mean_terminated_length": 107.57828369140626, "completions/min_length": 0.0, "completions/min_terminated_length": 42.6, "epoch": 0.16, "grad_norm": 0.0022806006018072367, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 163786263.0, "reward": 0.866922116279602, "reward_std": 0.1387272745370865, "rewards/accuracy_reward": 0.4404296875, "rewards/brier_reward": 0.6948660254478455, "rewards/confidence_uniqueness_reward": 0.9550655364990235, "rewards/format_reward": 0.99716796875, "rewards/frontier_aurc_reward": -0.004550308641046286, "rewards/frontier_ece_reward": -0.00014091167831793429, "rewards/frontier_entropy_batch_reward": -0.16798928380012512, "rewards/volume_coverage_0": -2.5247680335294122e-11, "rewards/volume_coverage_1": -2.5247680335294122e-11, "rewards/volume_coverage_10": -8.908247889349851e-11, "rewards/volume_coverage_15": 4.961911284628861e-10, "rewards/volume_coverage_20": 4.287086199927792e-10, "rewards/volume_coverage_25": 5.975818465064897e-10, "rewards/volume_coverage_5": -2.5247680335294122e-11, "signal/accuracy_reward/centered_abs_mean": 0.16611328125, "signal/accuracy_reward/group_std_mean": 0.21109898686408995, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.083056640625, "signal/advantage_abs_mean": 0.10861865431070328, "signal/advantage_pre_scale_abs_mean": 0.10861865431070328, "signal/advantage_pre_scale_std": 0.16101040244102477, "signal/advantage_std": 0.16101040244102477, "signal/brier_reward/centered_abs_mean": 0.22490673661231994, "signal/brier_reward/group_std_mean": 0.2718587577342987, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022490674629807472, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022490674629807472, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015694119967520237, "signal/confidence_uniqueness_reward/group_std_mean": 0.026635773852467536, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001569412089884281, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001569412089884281, "signal/format_reward/centered_abs_mean": 0.005352783203125, "signal/format_reward/group_std_mean": 0.014074762351810932, "signal/format_reward/group_zero_std_frac": 0.925, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0026763916015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0026763916015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002780256886035204, "signal/frontier_aurc_reward/group_std_mean": 0.003884653048589826, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.475321209407411e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.475321209407411e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0696753516793251, "signal/frontier_ece_reward/group_std_mean": 0.09413132518529892, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0069675354287028314, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0069675354287028314, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2549823522567749, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33368061780929564, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02549823671579361, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02549823671579361, "signal/volume_coverage_0/centered_abs_mean": 1.649701220074462e-09, "signal/volume_coverage_0/group_std_mean": 2.0688173063554415e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.95, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.6497011742777623e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.6497011742777623e-10, "signal/volume_coverage_1/centered_abs_mean": 1.649701220074462e-09, "signal/volume_coverage_1/group_std_mean": 2.0688173063554415e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.95, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.6497011742777623e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.6497011742777623e-10, "signal/volume_coverage_10/centered_abs_mean": 4.160429378785579e-09, "signal/volume_coverage_10/group_std_mean": 5.213142106885727e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.95, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.160429022126433e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.160429022126433e-10, "signal/volume_coverage_15/centered_abs_mean": 8.538843809802187e-09, "signal/volume_coverage_15/group_std_mean": 1.0691787555305866e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.9, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.538843764005488e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 8.538843764005488e-10, "signal/volume_coverage_20/centered_abs_mean": 1.1193042093182726e-08, "signal/volume_coverage_20/group_std_mean": 1.4015788185606227e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.9, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.1193041514478975e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.1193041514478975e-09, "signal/volume_coverage_25/centered_abs_mean": 1.2908310909054422e-08, "signal/volume_coverage_25/group_std_mean": 1.6175339656587796e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.8875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.290830988626146e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.290830988626146e-09, "signal/volume_coverage_5/centered_abs_mean": 1.649701220074462e-09, "signal/volume_coverage_5/group_std_mean": 2.0688173063554415e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.95, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.6497011742777623e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.6497011742777623e-10, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6806264122050393, "eval_calibration/batch_distribution_entropy": 0.9356194614467648, "eval_calibration/buffer_distribution_entropy": 0.9527854156241127, "eval_calibration/confidence_entropy": 0.5131242003432436, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.36174847179112873, "eval_calibration/mean_confidence": 0.5375307711314923, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 500.25, "eval_completions/max_terminated_length": 500.25, "eval_completions/mean_length": 114.09334564208984, "eval_completions/mean_terminated_length": 114.30531311035156, "eval_completions/min_length": 38.75, "eval_completions/min_terminated_length": 52.25, "eval_loss": 0.0, "eval_num_tokens": 163786263.0, "eval_reward": 0.7351007908582687, "eval_reward_std": 0.25359319150447845, "eval_rewards/accuracy_reward": 0.357421875, "eval_rewards/brier_reward": 0.6816919445991516, "eval_rewards/confidence_uniqueness_reward": 0.9017924666404724, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.0053580960957333446, "eval_rewards/frontier_ece_reward": -0.011103931348770857, "eval_rewards/frontier_entropy_batch_reward": -0.998046875, "eval_rewards/volume_coverage_0": 1.9699982478638134e-09, "eval_rewards/volume_coverage_1": 1.9699982478638134e-09, "eval_rewards/volume_coverage_10": 8.171635479392592e-09, "eval_rewards/volume_coverage_15": 1.0906613390204711e-08, "eval_rewards/volume_coverage_20": 2.7413089020988934e-08, "eval_rewards/volume_coverage_25": 4.180572066303512e-08, "eval_rewards/volume_coverage_5": 1.9699982478638134e-09, "eval_runtime": 32.3428, "eval_samples_per_second": 15.459, "eval_signal/accuracy_reward/centered_abs_mean": 0.4473876953125, "eval_signal/accuracy_reward/group_std_mean": 0.47973204404115677, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22369384765625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22369384765625, "eval_signal/advantage_abs_mean": 0.23301321640610695, "eval_signal/advantage_pre_scale_abs_mean": 0.23301321640610695, "eval_signal/advantage_pre_scale_std": 0.25110187008976936, "eval_signal/advantage_std": 0.25110187008976936, "eval_signal/brier_reward/centered_abs_mean": 0.24164819344878197, "eval_signal/brier_reward/group_std_mean": 0.2891644388437271, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02416481962427497, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02416481962427497, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.03665702510625124, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04689502716064453, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00366570265032351, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00366570265032351, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003946851065848023, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005488026305101812, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9335638323100284e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9335638323100284e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.07612928375601768, "eval_signal/frontier_ece_reward/group_std_mean": 0.10946918278932571, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007612928398884833, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007612928398884833, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983, "eval_signal/volume_coverage_0/centered_abs_mean": 6.782286787743175e-09, "eval_signal/volume_coverage_0/group_std_mean": 9.168166559270219e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.8125, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.78228681549875e-10, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 6.78228681549875e-10, "eval_signal/volume_coverage_1/centered_abs_mean": 6.782286787743175e-09, "eval_signal/volume_coverage_1/group_std_mean": 9.168166559270219e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.8125, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.78228681549875e-10, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 6.78228681549875e-10, "eval_signal/volume_coverage_10/centered_abs_mean": 2.1339159606004188e-08, "eval_signal/volume_coverage_10/group_std_mean": 2.773909602016289e-08, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.75, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.133916078561615e-09, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 2.133916078561615e-09, "eval_signal/volume_coverage_15/centered_abs_mean": 2.9500576481655827e-08, "eval_signal/volume_coverage_15/group_std_mean": 3.8776015842678646e-08, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.625, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.950057677308937e-09, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 2.950057677308937e-09, "eval_signal/volume_coverage_20/centered_abs_mean": 8.900886916407558e-08, "eval_signal/volume_coverage_20/group_std_mean": 1.1780899766833386e-07, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.4375, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.900887110696587e-09, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 8.900887110696587e-09, "eval_signal/volume_coverage_25/centered_abs_mean": 1.415008363814252e-07, "eval_signal/volume_coverage_25/group_std_mean": 1.8812330493389595e-07, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.375, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.415008371585813e-08, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 1.415008371585813e-08, "eval_signal/volume_coverage_5/centered_abs_mean": 6.782286787743175e-09, "eval_signal/volume_coverage_5/group_std_mean": 9.168166559270219e-09, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.8125, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.78228681549875e-10, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 6.78228681549875e-10, "eval_steps_per_second": 0.124, "step": 50 }, { "calibration/aurc": 0.513497304567586, "calibration/batch_distribution_entropy": 0.9930806560395098, "calibration/buffer_distribution_entropy": 0.9559643505400857, "calibration/confidence_entropy": 0.5101240746310837, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.00078125, "calibration/coverage@20%": 0.00078125, "calibration/coverage@25%": 0.00078125, "calibration/coverage@30%": 0.00078125, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.23066903954651802, "calibration/mean_confidence": 0.5127820021193525, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 934.2, "completions/max_terminated_length": 934.2, "completions/mean_length": 117.21943359375, "completions/mean_terminated_length": 117.32320404052734, "completions/min_length": 0.0, "completions/min_terminated_length": 45.8, "epoch": 0.176, "grad_norm": 0.001983851892873645, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 180223710.0, "reward": 0.8638802170753479, "reward_std": 0.13377538919448853, "rewards/accuracy_reward": 0.43115234375, "rewards/brier_reward": 0.6899467229843139, "rewards/confidence_uniqueness_reward": 0.9573141932487488, "rewards/format_reward": 0.99873046875, "rewards/frontier_aurc_reward": -0.004610071796923876, "rewards/frontier_ece_reward": -0.002095718286000192, "rewards/frontier_entropy_batch_reward": -0.15520085394382477, "rewards/volume_coverage_0": 9.170957243620492e-10, "rewards/volume_coverage_1": 9.170957243620492e-10, "rewards/volume_coverage_10": 9.170957243620492e-10, "rewards/volume_coverage_15": 3.3474374161457377e-09, "rewards/volume_coverage_20": 4.455243285406141e-09, "rewards/volume_coverage_25": 1.2006583771650981e-08, "rewards/volume_coverage_5": 9.170957243620492e-10, "signal/accuracy_reward/centered_abs_mean": 0.159344482421875, "signal/accuracy_reward/group_std_mean": 0.20625897049903869, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0796722412109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0796722412109375, "signal/advantage_abs_mean": 0.10355194658041, "signal/advantage_pre_scale_abs_mean": 0.10355194658041, "signal/advantage_pre_scale_std": 0.1553642988204956, "signal/advantage_std": 0.1553642988204956, "signal/brier_reward/centered_abs_mean": 0.23099083304405213, "signal/brier_reward/group_std_mean": 0.27860647439956665, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023099084198474885, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023099084198474885, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01309113372117281, "signal/confidence_uniqueness_reward/group_std_mean": 0.020391806587576868, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013091133907437325, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013091133907437325, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.007181552983820438, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029003784526139497, "signal/frontier_aurc_reward/group_std_mean": 0.0041275909170508385, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.625473182182759e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.625473182182759e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.07209287136793137, "signal/frontier_ece_reward/group_std_mean": 0.09668067246675491, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007209287490695715, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007209287490695715, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24166457653045653, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3223122775554657, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02416645921766758, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02416645921766758, "signal/volume_coverage_0/centered_abs_mean": 2.355328121428357e-09, "signal/volume_coverage_0/group_std_mean": 2.991094849580378e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.940625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_1/centered_abs_mean": 2.355328121428357e-09, "signal/volume_coverage_1/group_std_mean": 2.991094849580378e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.940625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_10/centered_abs_mean": 2.355328121428357e-09, "signal/volume_coverage_10/group_std_mean": 2.991094849580378e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.940625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_15/centered_abs_mean": 1.483570165605741e-08, "signal/volume_coverage_15/group_std_mean": 1.9023410724461343e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.890625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4835702936977225e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.4835702936977225e-09, "signal/volume_coverage_20/centered_abs_mean": 1.9023089892211686e-08, "signal/volume_coverage_20/group_std_mean": 2.437677687483486e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.765625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.9023090734593405e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.9023090734593405e-09, "signal/volume_coverage_25/centered_abs_mean": 4.2793483723269075e-08, "signal/volume_coverage_25/group_std_mean": 5.4967429463648186e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.746875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 4.279348483210432e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 4.279348483210432e-09, "signal/volume_coverage_5/centered_abs_mean": 2.355328121428357e-09, "signal/volume_coverage_5/group_std_mean": 2.991094849580378e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.940625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.355328147796154e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.355328147796154e-10, "step": 55 }, { "calibration/aurc": 0.43183402035041557, "calibration/batch_distribution_entropy": 0.9931826995129007, "calibration/buffer_distribution_entropy": 0.9634033378659635, "calibration/confidence_entropy": 0.5033983537357946, "calibration/coverage@0%": 0.004696673189823875, "calibration/coverage@1%": 0.004696673189823875, "calibration/coverage@10%": 0.006653620352250489, "calibration/coverage@15%": 0.006653620352250489, "calibration/coverage@20%": 0.011741682974559686, "calibration/coverage@25%": 0.04227005870841487, "calibration/coverage@30%": 0.050097847358121325, "calibration/coverage@5%": 0.004696673189823875, "calibration/ece": 0.17537742447414378, "calibration/mean_confidence": 0.47045836480735737, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 660.6, "completions/max_terminated_length": 660.6, "completions/mean_length": 123.246875, "completions/mean_terminated_length": 123.39363555908203, "completions/min_length": 8.2, "completions/min_terminated_length": 47.8, "epoch": 0.192, "grad_norm": 0.001683655777014792, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 196300574.0, "reward": 0.888569688796997, "reward_std": 0.12694563418626786, "rewards/accuracy_reward": 0.47666015625, "rewards/brier_reward": 0.6981294512748718, "rewards/confidence_uniqueness_reward": 0.9559247612953186, "rewards/format_reward": 0.9984375, "rewards/frontier_aurc_reward": -0.004030398419126868, "rewards/frontier_ece_reward": 0.006262005632743239, "rewards/frontier_entropy_batch_reward": -0.14960378110408784, "rewards/volume_coverage_0": 3.1163899759378835e-11, "rewards/volume_coverage_1": 3.1163899759378835e-11, "rewards/volume_coverage_10": 3.1163899759378835e-11, "rewards/volume_coverage_15": 3.8662728279320604e-10, "rewards/volume_coverage_20": 5.279858298831708e-10, "rewards/volume_coverage_25": 9.922049041752312e-10, "rewards/volume_coverage_5": 3.1163899759378835e-11, "signal/accuracy_reward/centered_abs_mean": 0.146868896484375, "signal/accuracy_reward/group_std_mean": 0.1948981136083603, "signal/accuracy_reward/group_zero_std_frac": 0.4375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0734344482421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0734344482421875, "signal/advantage_abs_mean": 0.09662168025970459, "signal/advantage_pre_scale_abs_mean": 0.09662168025970459, "signal/advantage_pre_scale_std": 0.14739495515823364, "signal/advantage_std": 0.14739495515823364, "signal/brier_reward/centered_abs_mean": 0.23383308947086334, "signal/brier_reward/group_std_mean": 0.28255713582038877, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023383309692144395, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023383309692144395, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013910266570746899, "signal/confidence_uniqueness_reward/group_std_mean": 0.022051481157541276, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013910266570746899, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013910266570746899, "signal/format_reward/centered_abs_mean": 0.00301513671875, "signal/format_reward/group_std_mean": 0.008502526301890611, "signal/format_reward/group_zero_std_frac": 0.953125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001507568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001507568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026706780306994917, "signal/frontier_aurc_reward/group_std_mean": 0.0038585856091231108, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.338347669341601e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.338347669341601e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06825752407312394, "signal/frontier_ece_reward/group_std_mean": 0.09220706075429916, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006825752649456262, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006825752649456262, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23939733803272248, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3223705470561981, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02393973395228386, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02393973395228386, "signal/volume_coverage_0/centered_abs_mean": 4.1413477647189725e-10, "signal/volume_coverage_0/group_std_mean": 5.290546389868922e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_1/centered_abs_mean": 4.1413477647189725e-10, "signal/volume_coverage_1/group_std_mean": 5.290546389868922e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_10/centered_abs_mean": 4.1413477647189725e-10, "signal/volume_coverage_10/group_std_mean": 5.290546389868922e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_15/centered_abs_mean": 2.613625393887986e-09, "signal/volume_coverage_15/group_std_mean": 3.2837682240050014e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.903125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.6136253682140785e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.6136253682140785e-10, "signal/volume_coverage_20/centered_abs_mean": 3.437437523512443e-09, "signal/volume_coverage_20/group_std_mean": 4.3065619848015045e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.903125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.437437275793931e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.437437275793931e-10, "signal/volume_coverage_25/centered_abs_mean": 5.901994704071711e-09, "signal/volume_coverage_25/group_std_mean": 7.4068437938556995e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.846875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.90199494485133e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 5.90199494485133e-10, "signal/volume_coverage_5/centered_abs_mean": 4.1413477647189725e-10, "signal/volume_coverage_5/group_std_mean": 5.290546389868922e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.141347924313532e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.141347924313532e-11, "step": 60 }, { "calibration/aurc": 0.3976681614269202, "calibration/batch_distribution_entropy": 0.9959729508671309, "calibration/buffer_distribution_entropy": 0.9696937379789341, "calibration/confidence_entropy": 0.5022068227163163, "calibration/coverage@0%": 0.004691322162426614, "calibration/coverage@1%": 0.004691322162426614, "calibration/coverage@10%": 0.010550697162426615, "calibration/coverage@15%": 0.021878822162426613, "calibration/coverage@20%": 0.11690083781800391, "calibration/coverage@25%": 0.25801278131115457, "calibration/coverage@30%": 0.4043052837573386, "calibration/coverage@5%": 0.004691322162426614, "calibration/ece": 0.22137551214873522, "calibration/mean_confidence": 0.49639520530660997, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 909.6, "completions/max_terminated_length": 909.6, "completions/mean_length": 129.9103515625, "completions/mean_terminated_length": 130.04986267089845, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.208, "grad_norm": 0.0016923915827646852, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 212663080.0, "reward": 0.9061551690101624, "reward_std": 0.1243842214345932, "rewards/accuracy_reward": 0.50966796875, "rewards/brier_reward": 0.702867615222931, "rewards/confidence_uniqueness_reward": 0.9556405544281006, "rewards/format_reward": 0.9982421875, "rewards/frontier_aurc_reward": -0.003709340374916792, "rewards/frontier_ece_reward": 0.009891654085367917, "rewards/frontier_entropy_batch_reward": -0.14593522846698762, "rewards/volume_coverage_0": 2.715153962773442e-10, "rewards/volume_coverage_1": 2.715153962773442e-10, "rewards/volume_coverage_10": 4.856030966227109e-10, "rewards/volume_coverage_15": 1.0036363989884834e-09, "rewards/volume_coverage_20": 1.3117872517398599e-09, "rewards/volume_coverage_25": 1.9953037999620447e-09, "rewards/volume_coverage_5": 2.715153962773442e-10, "signal/accuracy_reward/centered_abs_mean": 0.142767333984375, "signal/accuracy_reward/group_std_mean": 0.1889306128025055, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0713836669921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0713836669921875, "signal/advantage_abs_mean": 0.09461160451173782, "signal/advantage_pre_scale_abs_mean": 0.09461160451173782, "signal/advantage_pre_scale_std": 0.14617311358451843, "signal/advantage_std": 0.14617311358451843, "signal/brier_reward/centered_abs_mean": 0.22771627008914946, "signal/brier_reward/group_std_mean": 0.2767902910709381, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022771627083420752, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022771627083420752, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013186541199684144, "signal/confidence_uniqueness_reward/group_std_mean": 0.02205616645514965, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013186540920287371, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013186540920287371, "signal/format_reward/centered_abs_mean": 0.00340576171875, "signal/format_reward/group_std_mean": 0.009943688940256833, "signal/format_reward/group_zero_std_frac": 0.94375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001702880859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001702880859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026509815361350775, "signal/frontier_aurc_reward/group_std_mean": 0.0038590433076024057, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3137269201688466e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3137269201688466e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06532840430736542, "signal/frontier_ece_reward/group_std_mean": 0.08815628290176392, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006532840337604285, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006532840337604285, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23155330419540404, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31283451318740846, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02315533086657524, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02315533086657524, "signal/volume_coverage_0/centered_abs_mean": 1.2957021122694811e-09, "signal/volume_coverage_0/group_std_mean": 1.6053079698874484e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.946875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2957022017812124e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.2957022017812124e-10, "signal/volume_coverage_1/centered_abs_mean": 1.2957021122694811e-09, "signal/volume_coverage_1/group_std_mean": 1.6053079698874484e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.946875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2957022017812124e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.2957022017812124e-10, "signal/volume_coverage_10/centered_abs_mean": 2.607133697818398e-09, "signal/volume_coverage_10/group_std_mean": 3.2581726594482776e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.896875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.60713383173905e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.60713383173905e-10, "signal/volume_coverage_15/centered_abs_mean": 4.622650684460438e-09, "signal/volume_coverage_15/group_std_mean": 5.792726703868212e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.8, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.6226512097347073e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 4.6226512097347073e-10, "signal/volume_coverage_20/centered_abs_mean": 7.073875726848477e-09, "signal/volume_coverage_20/group_std_mean": 8.8870939773944e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.746875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.073875627622295e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 7.073875627622295e-10, "signal/volume_coverage_25/centered_abs_mean": 9.611973039724831e-09, "signal/volume_coverage_25/group_std_mean": 1.2051949904723358e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.746875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 9.611973272871665e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 9.611973272871665e-10, "signal/volume_coverage_5/centered_abs_mean": 1.2957021122694811e-09, "signal/volume_coverage_5/group_std_mean": 1.6053079698874484e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.946875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2957022017812124e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.2957022017812124e-10, "step": 65 }, { "calibration/aurc": 0.3912217986078258, "calibration/batch_distribution_entropy": 0.9941200245807609, "calibration/buffer_distribution_entropy": 0.9744661505967608, "calibration/confidence_entropy": 0.514071657701208, "calibration/coverage@0%": 0.0062660680710640414, "calibration/coverage@1%": 0.0062660680710640414, "calibration/coverage@10%": 0.0062660680710640414, "calibration/coverage@15%": 0.019191919918652392, "calibration/coverage@20%": 0.09790743651912819, "calibration/coverage@25%": 0.1472402878103296, "calibration/coverage@30%": 0.2897601330772035, "calibration/coverage@5%": 0.0062660680710640414, "calibration/ece": 0.1981607008778328, "calibration/mean_confidence": 0.494655158586494, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 772.2, "completions/max_terminated_length": 772.2, "completions/mean_length": 133.83466796875, "completions/mean_terminated_length": 134.01897583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 53.2, "epoch": 0.224, "grad_norm": 0.0014168552588671446, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 229186731.0, "reward": 0.8921693325042724, "reward_std": 0.11579417437314987, "rewards/accuracy_reward": 0.48359375, "rewards/brier_reward": 0.7135935187339782, "rewards/confidence_uniqueness_reward": 0.9547363996505738, "rewards/format_reward": 0.99833984375, "rewards/frontier_aurc_reward": -0.003805333934724331, "rewards/frontier_ece_reward": 0.008672526269219816, "rewards/frontier_entropy_batch_reward": -0.16450113654136658, "rewards/volume_coverage_0": 7.487820780083254e-11, "rewards/volume_coverage_1": 7.487820780083254e-11, "rewards/volume_coverage_10": 7.487820780083254e-11, "rewards/volume_coverage_15": 5.375929285067471e-10, "rewards/volume_coverage_20": 7.936750173742624e-10, "rewards/volume_coverage_25": 1.4622947658304851e-09, "rewards/volume_coverage_5": 7.487820780083254e-11, "signal/accuracy_reward/centered_abs_mean": 0.12554931640625, "signal/accuracy_reward/group_std_mean": 0.16789826452732087, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062774658203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062774658203125, "signal/advantage_abs_mean": 0.088116155564785, "signal/advantage_pre_scale_abs_mean": 0.088116155564785, "signal/advantage_pre_scale_std": 0.1376793324947357, "signal/advantage_std": 0.1376793324947357, "signal/brier_reward/centered_abs_mean": 0.21528012156486512, "signal/brier_reward/group_std_mean": 0.26321386098861693, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021528012305498122, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021528012305498122, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01388053372502327, "signal/confidence_uniqueness_reward/group_std_mean": 0.02223154343664646, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013880533864721657, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013880533864721657, "signal/format_reward/centered_abs_mean": 0.003204345703125, "signal/format_reward/group_std_mean": 0.009054953418672084, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016021728515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016021728515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00263366037979722, "signal/frontier_aurc_reward/group_std_mean": 0.0038668750785291195, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.292075634817593e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.292075634817593e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.060927893966436386, "signal/frontier_ece_reward/group_std_mean": 0.08261417448520661, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006092789676040411, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006092789676040411, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2596702575683594, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34350005388259885, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02596702575683594, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02596702575683594, "signal/volume_coverage_0/centered_abs_mean": 2.6789099966118e-10, "signal/volume_coverage_0/group_std_mean": 3.3500919399687135e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_1/centered_abs_mean": 2.6789099966118e-10, "signal/volume_coverage_1/group_std_mean": 3.3500919399687135e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_10/centered_abs_mean": 2.6789099966118e-10, "signal/volume_coverage_10/group_std_mean": 3.3500919399687135e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_15/centered_abs_mean": 1.3681588584280035e-09, "signal/volume_coverage_15/group_std_mean": 1.7402577712211098e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.915625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.3681589680625272e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.3681589680625272e-10, "signal/volume_coverage_20/centered_abs_mean": 2.2410811706397025e-09, "signal/volume_coverage_20/group_std_mean": 2.872985727098154e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.909375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.241081313580917e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.241081313580917e-10, "signal/volume_coverage_25/centered_abs_mean": 3.5609483861609446e-09, "signal/volume_coverage_25/group_std_mean": 4.546949861072846e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.85, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.5609485707355224e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.5609485707355224e-10, "signal/volume_coverage_5/centered_abs_mean": 2.6789099966118e-10, "signal/volume_coverage_5/group_std_mean": 3.3500919399687135e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.6789100625312922e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.6789100625312922e-11, "step": 70 }, { "calibration/aurc": 0.420320349024755, "calibration/batch_distribution_entropy": 0.9855579386582516, "calibration/buffer_distribution_entropy": 0.9778695258036263, "calibration/confidence_entropy": 0.5230157101208323, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.000390625, "calibration/coverage@15%": 0.10157778864970646, "calibration/coverage@20%": 0.20821917808219176, "calibration/coverage@25%": 0.21291585127201565, "calibration/coverage@30%": 0.21839530332681018, "calibration/coverage@5%": 0.000390625, "calibration/ece": 0.20896871328557004, "calibration/mean_confidence": 0.505650939324179, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 643.6, "completions/max_terminated_length": 643.6, "completions/mean_length": 139.76630859375, "completions/mean_terminated_length": 139.8614074707031, "completions/min_length": 23.2, "completions/min_terminated_length": 56.4, "epoch": 0.24, "grad_norm": 0.0017766956007108092, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 245869618.0, "reward": 0.9217252969741822, "reward_std": 0.12044314593076706, "rewards/accuracy_reward": 0.53876953125, "rewards/brier_reward": 0.7134970784187317, "rewards/confidence_uniqueness_reward": 0.955771553516388, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003404102800413966, "rewards/frontier_ece_reward": 0.012143013067543507, "rewards/frontier_entropy_batch_reward": -0.15318614840507508, "rewards/volume_coverage_0": 3.451310973123789e-11, "rewards/volume_coverage_1": 3.451310973123789e-11, "rewards/volume_coverage_10": 3.451310973123789e-11, "rewards/volume_coverage_15": 3.451310973123789e-11, "rewards/volume_coverage_20": -3.028895327605863e-09, "rewards/volume_coverage_25": 1.9700875912576078e-08, "rewards/volume_coverage_5": 3.451310973123789e-11, "signal/accuracy_reward/centered_abs_mean": 0.140142822265625, "signal/accuracy_reward/group_std_mean": 0.18538169860839843, "signal/accuracy_reward/group_zero_std_frac": 0.465625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0700714111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0700714111328125, "signal/advantage_abs_mean": 0.09228953570127488, "signal/advantage_pre_scale_abs_mean": 0.09228953570127488, "signal/advantage_pre_scale_std": 0.14287880957126617, "signal/advantage_std": 0.14287880957126617, "signal/brier_reward/centered_abs_mean": 0.21166147887706757, "signal/brier_reward/group_std_mean": 0.2590271383523941, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02116614766418934, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02116614766418934, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012101791240274907, "signal/confidence_uniqueness_reward/group_std_mean": 0.017954951152205467, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001210179179906845, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001210179179906845, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026966645382344723, "signal/frontier_aurc_reward/group_std_mean": 0.00390788302756846, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3708307819324544e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3708307819324544e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05930803343653679, "signal/frontier_ece_reward/group_std_mean": 0.08072617352008819, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0059308033436536785, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0059308033436536785, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23889875411987305, "signal/frontier_entropy_batch_reward/group_std_mean": 0.317973917722702, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023889876157045364, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023889876157045364, "signal/volume_coverage_0/centered_abs_mean": 2.0800886846306099e-10, "signal/volume_coverage_0/group_std_mean": 2.6474508141305366e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_1/centered_abs_mean": 2.0800886846306099e-10, "signal/volume_coverage_1/group_std_mean": 2.6474508141305366e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_10/centered_abs_mean": 2.0800886846306099e-10, "signal/volume_coverage_10/group_std_mean": 2.6474508141305366e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_15/centered_abs_mean": 2.0800886846306099e-10, "signal/volume_coverage_15/group_std_mean": 2.6474508141305366e-10, "signal/volume_coverage_15/group_zero_std_frac": 1.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_20/centered_abs_mean": 6.613854078296732e-09, "signal/volume_coverage_20/group_std_mean": 8.326563194493985e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.88125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.613853915926615e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 6.613853915926615e-10, "signal/volume_coverage_25/centered_abs_mean": 5.696032007451723e-08, "signal/volume_coverage_25/group_std_mean": 7.250977392958547e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.83125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.696031910029653e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 5.696031910029653e-09, "signal/volume_coverage_5/centered_abs_mean": 2.0800886846306099e-10, "signal/volume_coverage_5/group_std_mean": 2.6474508141305366e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.0800886707528222e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.0800886707528222e-11, "step": 75 }, { "calibration/aurc": 0.3544769332207135, "calibration/batch_distribution_entropy": 0.9897546231661192, "calibration/buffer_distribution_entropy": 0.9801283074949945, "calibration/confidence_entropy": 0.5082607589543467, "calibration/coverage@0%": 0.002735139432485323, "calibration/coverage@1%": 0.002735139432485323, "calibration/coverage@10%": 0.025782014432485324, "calibration/coverage@15%": 0.03203201443248532, "calibration/coverage@20%": 0.11328201443248531, "calibration/coverage@25%": 0.33795942392367906, "calibration/coverage@30%": 0.47439227617416824, "calibration/coverage@5%": 0.010547639432485323, "calibration/ece": 0.176115213516259, "calibration/mean_confidence": 0.5356183375328162, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 524.0, "completions/max_terminated_length": 524.0, "completions/mean_length": 144.349609375, "completions/mean_terminated_length": 144.47740173339844, "completions/min_length": 11.6, "completions/min_terminated_length": 57.4, "epoch": 0.256, "grad_norm": 0.001462550018914044, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 262402574.0, "reward": 0.9123332858085632, "reward_std": 0.11308208853006363, "rewards/accuracy_reward": 0.51884765625, "rewards/brier_reward": 0.7276524066925049, "rewards/confidence_uniqueness_reward": 0.9561123728752137, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003458845429122448, "rewards/frontier_ece_reward": 0.015241177752614021, "rewards/frontier_entropy_batch_reward": -0.16508454382419585, "rewards/volume_coverage_0": -3.699867013240085e-11, "rewards/volume_coverage_1": -3.699867013240085e-11, "rewards/volume_coverage_10": -3.699867013240085e-11, "rewards/volume_coverage_15": -8.109867156080516e-11, "rewards/volume_coverage_20": -7.289573838217128e-12, "rewards/volume_coverage_25": 3.8061882085774904e-11, "rewards/volume_coverage_5": -3.699867013240085e-11, "signal/accuracy_reward/centered_abs_mean": 0.124127197265625, "signal/accuracy_reward/group_std_mean": 0.1660928785800934, "signal/accuracy_reward/group_zero_std_frac": 0.5125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620635986328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0620635986328125, "signal/advantage_abs_mean": 0.08623839318752288, "signal/advantage_pre_scale_abs_mean": 0.08623839318752288, "signal/advantage_pre_scale_std": 0.1374477416276932, "signal/advantage_std": 0.1374477416276932, "signal/brier_reward/centered_abs_mean": 0.20527395308017732, "signal/brier_reward/group_std_mean": 0.2524005711078644, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020527396351099014, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020527396351099014, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012610967457294463, "signal/confidence_uniqueness_reward/group_std_mean": 0.01848965808749199, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012610967503860592, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012610967503860592, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028766633477061987, "signal/frontier_aurc_reward/group_std_mean": 0.004133455315604806, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.595829330151901e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.595829330151901e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05877775847911835, "signal/frontier_ece_reward/group_std_mean": 0.07837124764919282, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0058777758851647375, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0058777758851647375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2506664037704468, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3321652948856354, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025066639855504037, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025066639855504037, "signal/volume_coverage_0/centered_abs_mean": 7.425148794426573e-10, "signal/volume_coverage_0/group_std_mean": 9.40662159276684e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_1/centered_abs_mean": 7.425148794426573e-10, "signal/volume_coverage_1/group_std_mean": 9.40662159276684e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_10/centered_abs_mean": 7.425148794426573e-10, "signal/volume_coverage_10/group_std_mean": 9.40662159276684e-10, "signal/volume_coverage_10/group_zero_std_frac": 0.9625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_15/centered_abs_mean": 8.75433755348487e-10, "signal/volume_coverage_15/group_std_mean": 1.108826042095501e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.953125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.75433795767544e-11, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 8.75433795767544e-11, "signal/volume_coverage_20/centered_abs_mean": 1.2596845698487068e-09, "signal/volume_coverage_20/group_std_mean": 1.593373660790931e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.953125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.2596845936144184e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.2596845936144184e-10, "signal/volume_coverage_25/centered_abs_mean": 1.3196504354551219e-09, "signal/volume_coverage_25/group_std_mean": 1.6703829597286557e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.953125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.3196504661597274e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.3196504661597274e-10, "signal/volume_coverage_5/centered_abs_mean": 7.425148794426573e-10, "signal/volume_coverage_5/group_std_mean": 9.40662159276684e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.9625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.425148976572537e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.425148976572537e-11, "step": 80 }, { "calibration/aurc": 0.43225073928840985, "calibration/batch_distribution_entropy": 0.9909829492064789, "calibration/buffer_distribution_entropy": 0.9819880995143937, "calibration/confidence_entropy": 0.5063951814225, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.005078125, "calibration/coverage@15%": 0.039453125, "calibration/coverage@20%": 0.063671875, "calibration/coverage@25%": 0.133984375, "calibration/coverage@30%": 0.26191023284313725, "calibration/coverage@5%": 0.001953125, "calibration/ece": 0.17791232659343512, "calibration/mean_confidence": 0.5035804291342054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 152.45693359375, "completions/mean_terminated_length": 152.53079528808593, "completions/min_length": 22.8, "completions/min_terminated_length": 59.4, "epoch": 0.272, "grad_norm": 0.0013894687872380018, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 278929429.0, "reward": 0.9068776488304138, "reward_std": 0.11359266936779022, "rewards/accuracy_reward": 0.505859375, "rewards/brier_reward": 0.7291383624076844, "rewards/confidence_uniqueness_reward": 0.9561533212661744, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0033955852035433055, "rewards/frontier_ece_reward": 0.014794471859931945, "rewards/frontier_entropy_batch_reward": -0.15725232064723968, "rewards/volume_coverage_0": 2.8186108828875955e-11, "rewards/volume_coverage_1": 2.8186108828875955e-11, "rewards/volume_coverage_10": 1.9451884721022327e-11, "rewards/volume_coverage_15": -8.745132416831946e-10, "rewards/volume_coverage_20": -2.1576839356884924e-09, "rewards/volume_coverage_25": -2.4406589527181666e-09, "rewards/volume_coverage_5": 2.8186108828875955e-11, "signal/accuracy_reward/centered_abs_mean": 0.1293701171875, "signal/accuracy_reward/group_std_mean": 0.17103227078914643, "signal/accuracy_reward/group_zero_std_frac": 0.51875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06468505859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06468505859375, "signal/advantage_abs_mean": 0.08693162500858306, "signal/advantage_pre_scale_abs_mean": 0.08693162500858306, "signal/advantage_pre_scale_std": 0.13726737201213837, "signal/advantage_std": 0.13726737201213837, "signal/brier_reward/centered_abs_mean": 0.20380387604236602, "signal/brier_reward/group_std_mean": 0.2516744613647461, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02038038745522499, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02038038745522499, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012038312293589115, "signal/confidence_uniqueness_reward/group_std_mean": 0.016938690468668936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001203831285238266, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001203831285238266, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002711014449596405, "signal/frontier_aurc_reward/group_std_mean": 0.003989389818161726, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.388768163858913e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.388768163858913e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.054540529102087024, "signal/frontier_ece_reward/group_std_mean": 0.0740948662161827, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005454053077846766, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005454053077846766, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24244910776615142, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32230539321899415, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024244911223649978, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024244911223649978, "signal/volume_coverage_0/centered_abs_mean": 5.712658679435378e-10, "signal/volume_coverage_0/group_std_mean": 7.124818721115922e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.971875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.712658662088144e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.712658662088144e-11, "signal/volume_coverage_1/centered_abs_mean": 5.712658679435378e-10, "signal/volume_coverage_1/group_std_mean": 7.124818721115922e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.971875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.712658662088144e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.712658662088144e-11, "signal/volume_coverage_10/centered_abs_mean": 1.208340172365041e-09, "signal/volume_coverage_10/group_std_mean": 1.5031356812023056e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.95, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.2083401761814328e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.2083401761814328e-10, "signal/volume_coverage_15/centered_abs_mean": 5.3549160383692396e-09, "signal/volume_coverage_15/group_std_mean": 6.828456811103223e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.859375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.354915864896892e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.354915864896892e-10, "signal/volume_coverage_20/centered_abs_mean": 1.0593179478046721e-08, "signal/volume_coverage_20/group_std_mean": 1.35569361847665e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.853125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0593180221896148e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.0593180221896148e-09, "signal/volume_coverage_25/centered_abs_mean": 1.180654151111682e-08, "signal/volume_coverage_25/group_std_mean": 1.5109510609345732e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.853125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.1806541300174445e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.1806541300174445e-09, "signal/volume_coverage_5/centered_abs_mean": 5.712658679435378e-10, "signal/volume_coverage_5/group_std_mean": 7.124818721115922e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.971875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.712658662088144e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.712658662088144e-11, "step": 85 }, { "calibration/aurc": 0.4088050668355857, "calibration/batch_distribution_entropy": 0.9935028330405942, "calibration/buffer_distribution_entropy": 0.9839995191322982, "calibration/confidence_entropy": 0.5062243662924341, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.004296875, "calibration/coverage@25%": 0.10625, "calibration/coverage@30%": 0.15546875, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17225932680606057, "calibration/mean_confidence": 0.5062148171178394, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 854.4, "completions/max_terminated_length": 854.4, "completions/mean_length": 154.471484375, "completions/mean_terminated_length": 154.5472412109375, "completions/min_length": 23.4, "completions/min_terminated_length": 60.0, "epoch": 0.288, "grad_norm": 0.001574499299749732, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 295469393.0, "reward": 0.9067415595054626, "reward_std": 0.1190925344824791, "rewards/accuracy_reward": 0.50576171875, "rewards/brier_reward": 0.7334963202476501, "rewards/confidence_uniqueness_reward": 0.9556491851806641, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003264536615461111, "rewards/frontier_ece_reward": 0.015714940335601568, "rewards/frontier_entropy_batch_reward": -0.16145087778568268, "rewards/volume_coverage_0": 2.3124501792592954e-11, "rewards/volume_coverage_1": 2.3124501792592954e-11, "rewards/volume_coverage_10": 8.063035152744425e-11, "rewards/volume_coverage_15": 5.572204008971737e-10, "rewards/volume_coverage_20": 1.259253810080574e-09, "rewards/volume_coverage_25": 8.726174105522234e-09, "rewards/volume_coverage_5": 2.3124501792592954e-11, "signal/accuracy_reward/centered_abs_mean": 0.140118408203125, "signal/accuracy_reward/group_std_mean": 0.18722763955593108, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0700592041015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0700592041015625, "signal/advantage_abs_mean": 0.09046539664268494, "signal/advantage_pre_scale_abs_mean": 0.09046539664268494, "signal/advantage_pre_scale_std": 0.14088937640190125, "signal/advantage_std": 0.14088937640190125, "signal/brier_reward/centered_abs_mean": 0.20394990146160125, "signal/brier_reward/group_std_mean": 0.2513652205467224, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02039499022066593, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02039499022066593, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012927304022014141, "signal/confidence_uniqueness_reward/group_std_mean": 0.01885262057185173, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292730402201414, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292730402201414, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.0049718443769961596, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026389469858258963, "signal/frontier_aurc_reward/group_std_mean": 0.003926029847934842, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2986837322823706e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2986837322823706e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05253084748983383, "signal/frontier_ece_reward/group_std_mean": 0.07113655209541321, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005253084935247898, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005253084935247898, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2522804230451584, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3346827507019043, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025228042155504227, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025228042155504227, "signal/volume_coverage_0/centered_abs_mean": 2.599262319269435e-10, "signal/volume_coverage_0/group_std_mean": 3.3370869817472835e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.5992624198833968e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.5992624198833968e-11, "signal/volume_coverage_1/centered_abs_mean": 2.599262319269435e-10, "signal/volume_coverage_1/group_std_mean": 3.3370869817472835e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.5992624198833968e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.5992624198833968e-11, "signal/volume_coverage_10/centered_abs_mean": 7.917605640561653e-10, "signal/volume_coverage_10/group_std_mean": 1.0258700486320826e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.959375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.917605408108708e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.917605408108708e-11, "signal/volume_coverage_15/centered_abs_mean": 1.934186673324678e-09, "signal/volume_coverage_15/group_std_mean": 2.4541597212768274e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.871875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.934186696223028e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.934186696223028e-10, "signal/volume_coverage_20/centered_abs_mean": 3.4954434416367697e-09, "signal/volume_coverage_20/group_std_mean": 4.4067209895026774e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.85625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.495443653273034e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.495443653273034e-10, "signal/volume_coverage_25/centered_abs_mean": 1.7893334902652215e-08, "signal/volume_coverage_25/group_std_mean": 2.2423871748333824e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.753125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.7893335507723761e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.7893335507723761e-09, "signal/volume_coverage_5/centered_abs_mean": 2.599262319269435e-10, "signal/volume_coverage_5/group_std_mean": 3.3370869817472835e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.5992624198833968e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.5992624198833968e-11, "step": 90 }, { "calibration/aurc": 0.3437908959750037, "calibration/batch_distribution_entropy": 0.9911090733037147, "calibration/buffer_distribution_entropy": 0.9855293379549626, "calibration/confidence_entropy": 0.5054607477470774, "calibration/coverage@0%": 0.002734375, "calibration/coverage@1%": 0.002734375, "calibration/coverage@10%": 0.026953125, "calibration/coverage@15%": 0.07305759803921569, "calibration/coverage@20%": 0.16379136029411764, "calibration/coverage@25%": 0.23336856617647062, "calibration/coverage@30%": 0.3679718137254902, "calibration/coverage@5%": 0.002734375, "calibration/ece": 0.1264503527833825, "calibration/mean_confidence": 0.5267013760163815, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 521.4, "completions/max_terminated_length": 521.4, "completions/mean_length": 157.44130859375, "completions/mean_terminated_length": 157.533837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 55.8, "epoch": 0.304, "grad_norm": 0.0015454553067684174, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 312011544.0, "reward": 0.9047148585319519, "reward_std": 0.11162041127681732, "rewards/accuracy_reward": 0.4994140625, "rewards/brier_reward": 0.7336631774902344, "rewards/confidence_uniqueness_reward": 0.9567294597625733, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003355294652283192, "rewards/frontier_ece_reward": 0.014690110087394714, "rewards/frontier_entropy_batch_reward": -0.15116712749004363, "rewards/volume_coverage_0": 7.067736534915881e-11, "rewards/volume_coverage_1": 7.067736534915881e-11, "rewards/volume_coverage_10": 7.067736534915881e-11, "rewards/volume_coverage_15": 2.0040456655268102e-10, "rewards/volume_coverage_20": 2.8201859181287147e-10, "rewards/volume_coverage_25": 3.96269733693444e-10, "rewards/volume_coverage_5": 7.067736534915881e-11, "signal/accuracy_reward/centered_abs_mean": 0.13067626953125, "signal/accuracy_reward/group_std_mean": 0.16918764114379883, "signal/accuracy_reward/group_zero_std_frac": 0.521875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065338134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.065338134765625, "signal/advantage_abs_mean": 0.08662759363651276, "signal/advantage_pre_scale_abs_mean": 0.08662759363651276, "signal/advantage_pre_scale_std": 0.13579559773206712, "signal/advantage_std": 0.13579559773206712, "signal/brier_reward/centered_abs_mean": 0.19534237086772918, "signal/brier_reward/group_std_mean": 0.24365437030792236, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019534237310290338, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019534237310290338, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011561508290469646, "signal/confidence_uniqueness_reward/group_std_mean": 0.01655147448182106, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011561508290469646, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011561508290469646, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002693092543631792, "signal/frontier_aurc_reward/group_std_mean": 0.004001007089391351, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.366365854162723e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.366365854162723e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0502876341342926, "signal/frontier_ece_reward/group_std_mean": 0.06829719394445419, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005028763134032488, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005028763134032488, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2349224418401718, "signal/frontier_entropy_batch_reward/group_std_mean": 0.31641311645507814, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023492245376110076, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023492245376110076, "signal/volume_coverage_0/centered_abs_mean": 1.7005946095771662e-10, "signal/volume_coverage_0/group_std_mean": 2.1498098995431648e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_1/centered_abs_mean": 1.7005946095771662e-10, "signal/volume_coverage_1/group_std_mean": 2.1498098995431648e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_10/centered_abs_mean": 1.7005946095771662e-10, "signal/volume_coverage_10/group_std_mean": 2.1498098995431648e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_15/centered_abs_mean": 4.253433771861381e-10, "signal/volume_coverage_15/group_std_mean": 5.489494206423196e-10, "signal/volume_coverage_15/group_zero_std_frac": 0.99375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.253433855128108e-11, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 4.253433855128108e-11, "signal/volume_coverage_20/centered_abs_mean": 1.1804189611885895e-09, "signal/volume_coverage_20/group_std_mean": 1.4893079200439807e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.953125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.1804189448821889e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.1804189448821889e-10, "signal/volume_coverage_25/centered_abs_mean": 1.6809882472101912e-09, "signal/volume_coverage_25/group_std_mean": 2.120693264051621e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.9375, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.680988253108251e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.680988253108251e-10, "signal/volume_coverage_5/centered_abs_mean": 1.7005946095771662e-10, "signal/volume_coverage_5/group_std_mean": 2.1498098995431648e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.7005946650883176e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.7005946650883176e-11, "step": 95 }, { "calibration/aurc": 0.2968136628582203, "calibration/batch_distribution_entropy": 0.9913739925787933, "calibration/buffer_distribution_entropy": 0.9865418876336769, "calibration/confidence_entropy": 0.5013708794629002, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.078125, "calibration/coverage@15%": 0.24296875, "calibration/coverage@20%": 0.37578125, "calibration/coverage@25%": 0.4625, "calibration/coverage@30%": 0.5640625, "calibration/coverage@5%": 0.038671875, "calibration/ece": 0.14873248383914847, "calibration/mean_confidence": 0.5345915049288531, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 601.2, "completions/max_terminated_length": 601.2, "completions/mean_length": 159.4876953125, "completions/mean_terminated_length": 159.58204956054686, "completions/min_length": 35.4, "completions/min_terminated_length": 60.6, "epoch": 0.32, "grad_norm": 0.0011671575484797359, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 328733402.0, "reward": 0.918266487121582, "reward_std": 0.09741021245718003, "rewards/accuracy_reward": 0.5234375, "rewards/brier_reward": 0.7532869338989258, "rewards/confidence_uniqueness_reward": 0.9563530325889588, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003001447068527341, "rewards/frontier_ece_reward": 0.01955104824155569, "rewards/frontier_entropy_batch_reward": -0.15992054343223572, "rewards/volume_coverage_0": 5.714750343950581e-11, "rewards/volume_coverage_1": 5.714750343950581e-11, "rewards/volume_coverage_10": -2.0253623704384605e-10, "rewards/volume_coverage_15": 2.330152578267769e-10, "rewards/volume_coverage_20": 3.5749040988748704e-10, "rewards/volume_coverage_25": 1.683893230105052e-09, "rewards/volume_coverage_5": 5.714750343950581e-11, "signal/accuracy_reward/centered_abs_mean": 0.09888916015625, "signal/accuracy_reward/group_std_mean": 0.13817883729934693, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049444580078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049444580078125, "signal/advantage_abs_mean": 0.07216062396764755, "signal/advantage_pre_scale_abs_mean": 0.07216062396764755, "signal/advantage_pre_scale_std": 0.12035643607378006, "signal/advantage_std": 0.12035643607378006, "signal/brier_reward/centered_abs_mean": 0.1801830530166626, "signal/brier_reward/group_std_mean": 0.22755076885223388, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01801830604672432, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01801830604672432, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012103627994656562, "signal/confidence_uniqueness_reward/group_std_mean": 0.01730086486786604, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012103628600016237, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012103628600016237, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002509988471865654, "signal/frontier_aurc_reward/group_std_mean": 0.0037281450815498827, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.137485618935898e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.137485618935898e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04895166382193565, "signal/frontier_ece_reward/group_std_mean": 0.06564311608672142, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004895166680216789, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004895166680216789, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24786655604839325, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32686212062835696, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024786657094955443, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024786657094955443, "signal/volume_coverage_0/centered_abs_mean": 1.6123284408975368e-10, "signal/volume_coverage_0/group_std_mean": 2.0597639774355692e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.612328473857283e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.612328473857283e-11, "signal/volume_coverage_1/centered_abs_mean": 1.6123284408975368e-10, "signal/volume_coverage_1/group_std_mean": 2.0597639774355692e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.612328473857283e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.612328473857283e-11, "signal/volume_coverage_10/centered_abs_mean": 8.140090455854843e-10, "signal/volume_coverage_10/group_std_mean": 1.0202650856105943e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.953125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.140090494018759e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.140090494018759e-11, "signal/volume_coverage_15/centered_abs_mean": 1.4006979430492006e-09, "signal/volume_coverage_15/group_std_mean": 1.7565225794713424e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.93125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4006979239672425e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.4006979239672425e-10, "signal/volume_coverage_20/centered_abs_mean": 2.6041023913625503e-09, "signal/volume_coverage_20/group_std_mean": 3.265086358922309e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.909375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.604102341749459e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.604102341749459e-10, "signal/volume_coverage_25/centered_abs_mean": 7.579259633100355e-09, "signal/volume_coverage_25/group_std_mean": 9.551655268807968e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.76875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.579259378096004e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 7.579259378096004e-10, "signal/volume_coverage_5/centered_abs_mean": 1.6123284408975368e-10, "signal/volume_coverage_5/group_std_mean": 2.0597639774355692e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.612328473857283e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.612328473857283e-11, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5300616902547085, "eval_calibration/batch_distribution_entropy": 0.9290254778537306, "eval_calibration/buffer_distribution_entropy": 0.987155783499908, "eval_calibration/confidence_entropy": 0.4952664114246258, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.09375, "eval_calibration/coverage@25%": 0.1484375, "eval_calibration/coverage@30%": 0.1796875, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.22405392000125574, "eval_calibration/mean_confidence": 0.45794091723888564, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 363.5, "eval_completions/max_terminated_length": 363.5, "eval_completions/mean_length": 168.7442741394043, "eval_completions/mean_terminated_length": 168.7442741394043, "eval_completions/min_length": 82.5, "eval_completions/min_terminated_length": 82.5, "eval_loss": 0.0, "eval_num_tokens": 328733402.0, "eval_reward": 0.7702741324901581, "eval_reward_std": 0.2489350475370884, "eval_rewards/accuracy_reward": 0.40625, "eval_rewards/brier_reward": 0.75297711789608, "eval_rewards/confidence_uniqueness_reward": 0.90283203125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0036825715797021985, "eval_rewards/frontier_ece_reward": 0.016142661683261395, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_rewards/volume_coverage_0": 7.314555679086097e-11, "eval_rewards/volume_coverage_1": 7.314555679086097e-11, "eval_rewards/volume_coverage_10": 7.314555679086097e-11, "eval_rewards/volume_coverage_15": 1.8925829212325255e-09, "eval_rewards/volume_coverage_20": 2.596937434193314e-09, "eval_rewards/volume_coverage_25": 7.488784627140976e-09, "eval_rewards/volume_coverage_5": 7.314555679086097e-11, "eval_runtime": 19.1537, "eval_samples_per_second": 26.105, "eval_signal/accuracy_reward/centered_abs_mean": 0.46337890625, "eval_signal/accuracy_reward/group_std_mean": 0.4884117320179939, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.231689453125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.231689453125, "eval_signal/advantage_abs_mean": 0.23422155156731606, "eval_signal/advantage_pre_scale_abs_mean": 0.23422155156731606, "eval_signal/advantage_pre_scale_std": 0.2462342418730259, "eval_signal/advantage_std": 0.2462342418730259, "eval_signal/brier_reward/centered_abs_mean": 0.21260768920183182, "eval_signal/brier_reward/group_std_mean": 0.26391947641968727, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02126076864078641, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02126076864078641, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0369873046875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04263218864798546, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036987304920330644, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036987304920330644, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003667147539090365, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005491463467478752, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5839345148124266e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5839345148124266e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.04754521977156401, "eval_signal/frontier_ece_reward/group_std_mean": 0.06594326347112656, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004754522116854787, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004754522116854787, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_signal/volume_coverage_0/centered_abs_mean": 1.6283716996329245e-10, "eval_signal/volume_coverage_0/group_std_mean": 2.073966241800118e-10, "eval_signal/volume_coverage_0/group_zero_std_frac": 1.0, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_1/centered_abs_mean": 1.6283716996329245e-10, "eval_signal/volume_coverage_1/group_std_mean": 2.073966241800118e-10, "eval_signal/volume_coverage_1/group_zero_std_frac": 1.0, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_10/centered_abs_mean": 1.6283716996329245e-10, "eval_signal/volume_coverage_10/group_std_mean": 2.073966241800118e-10, "eval_signal/volume_coverage_10/group_zero_std_frac": 1.0, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_15/centered_abs_mean": 3.6766215688252557e-09, "eval_signal/volume_coverage_15/group_std_mean": 4.504548767846062e-09, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.75, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.6766217349250285e-10, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 3.6766217349250285e-10, "eval_signal/volume_coverage_20/centered_abs_mean": 7.212183072979883e-09, "eval_signal/volume_coverage_20/group_std_mean": 9.087623431547343e-09, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.75, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.212182968896474e-10, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 7.212182968896474e-10, "eval_signal/volume_coverage_25/centered_abs_mean": 2.1109896330973044e-08, "eval_signal/volume_coverage_25/group_std_mean": 2.6170708489203776e-08, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.6875, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.1109897024862434e-09, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 2.1109897024862434e-09, "eval_signal/volume_coverage_5/centered_abs_mean": 1.6283716996329245e-10, "eval_signal/volume_coverage_5/group_std_mean": 2.073966241800118e-10, "eval_signal/volume_coverage_5/group_zero_std_frac": 1.0, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 1.6283716996329245e-11, "eval_steps_per_second": 0.209, "step": 100 }, { "calibration/aurc": 0.3244640631384934, "calibration/batch_distribution_entropy": 0.9834537141058156, "calibration/buffer_distribution_entropy": 0.9890071155622993, "calibration/confidence_entropy": 0.5025044401941181, "calibration/coverage@0%": 0.008994312622309198, "calibration/coverage@1%": 0.008994312622309198, "calibration/coverage@10%": 0.03634112035225049, "calibration/coverage@15%": 0.04924244740704501, "calibration/coverage@20%": 0.13952803938356165, "calibration/coverage@25%": 0.25559488136007824, "calibration/coverage@30%": 0.42172899339530334, "calibration/coverage@5%": 0.008994312622309198, "calibration/ece": 0.14414761784992106, "calibration/mean_confidence": 0.4889607813259758, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 614.0, "completions/max_terminated_length": 614.0, "completions/mean_length": 166.542578125, "completions/mean_terminated_length": 166.60614318847655, "completions/min_length": 39.2, "completions/min_terminated_length": 66.8, "epoch": 0.336, "grad_norm": 0.001344151794910431, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 345161230.0, "reward": 0.9194877028465271, "reward_std": 0.10516626238822938, "rewards/accuracy_reward": 0.5298828125, "rewards/brier_reward": 0.7511946082115173, "rewards/confidence_uniqueness_reward": 0.955797803401947, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002879744628444314, "rewards/frontier_ece_reward": 0.018249679170548917, "rewards/frontier_entropy_batch_reward": -0.17697776556015016, "rewards/volume_coverage_0": 4.766918682702626e-10, "rewards/volume_coverage_1": 4.766918682702626e-10, "rewards/volume_coverage_10": 2.4325521309070954e-09, "rewards/volume_coverage_15": 3.758264648448528e-09, "rewards/volume_coverage_20": 1.5596661688890846e-08, "rewards/volume_coverage_25": 2.5982084825987296e-08, "rewards/volume_coverage_5": 1.6899796385222431e-09, "signal/accuracy_reward/centered_abs_mean": 0.11619873046875, "signal/accuracy_reward/group_std_mean": 0.1555788427591324, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058099365234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.058099365234375, "signal/advantage_abs_mean": 0.08048571348190307, "signal/advantage_pre_scale_abs_mean": 0.08048571348190307, "signal/advantage_pre_scale_std": 0.12967196404933928, "signal/advantage_std": 0.12967196404933928, "signal/brier_reward/centered_abs_mean": 0.17787760496139526, "signal/brier_reward/group_std_mean": 0.22367975115776062, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017787761986255646, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017787761986255646, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012248115800321101, "signal/confidence_uniqueness_reward/group_std_mean": 0.016838539764285086, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012248115846887231, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012248115846887231, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024637745693325995, "signal/frontier_aurc_reward/group_std_mean": 0.0036766203120350838, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.079718335357029e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.079718335357029e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04419417455792427, "signal/frontier_ece_reward/group_std_mean": 0.05992407724261284, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00441941749304533, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00441941749304533, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26389217674732207, "signal/frontier_entropy_batch_reward/group_std_mean": 0.340835964679718, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026389218494296075, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026389218494296075, "signal/volume_coverage_0/centered_abs_mean": 9.67608809396281e-10, "signal/volume_coverage_0/group_std_mean": 1.2326531816098107e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.95625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.676088168555918e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 9.676088168555918e-11, "signal/volume_coverage_1/centered_abs_mean": 9.67608809396281e-10, "signal/volume_coverage_1/group_std_mean": 1.2326531816098107e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.95625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.676088168555918e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 9.676088168555918e-11, "signal/volume_coverage_10/centered_abs_mean": 3.821803023704162e-09, "signal/volume_coverage_10/group_std_mean": 4.833791990982439e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.85, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.8218032254525025e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.8218032254525025e-10, "signal/volume_coverage_15/centered_abs_mean": 6.327961237992397e-09, "signal/volume_coverage_15/group_std_mean": 7.959293004539125e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.85, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.327961339820664e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.327961339820664e-10, "signal/volume_coverage_20/centered_abs_mean": 2.1903684760057884e-08, "signal/volume_coverage_20/group_std_mean": 2.784849743239781e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.803125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.1903685564102214e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.1903685564102214e-09, "signal/volume_coverage_25/centered_abs_mean": 3.603442056410167e-08, "signal/volume_coverage_25/group_std_mean": 4.58625520782796e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.8, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.6034418525454638e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.6034418525454638e-09, "signal/volume_coverage_5/centered_abs_mean": 2.5051081816579446e-09, "signal/volume_coverage_5/group_std_mean": 3.187973132007249e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.90625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.5051084722241266e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.5051084722241266e-10, "step": 105 }, { "calibration/aurc": 0.36664578478043053, "calibration/batch_distribution_entropy": 0.9767600246372599, "calibration/buffer_distribution_entropy": 0.99403102832482, "calibration/confidence_entropy": 0.48415713252297066, "calibration/coverage@0%": 0.009783206947162426, "calibration/coverage@1%": 0.009783206947162426, "calibration/coverage@10%": 0.037166707436399216, "calibration/coverage@15%": 0.13565924657534245, "calibration/coverage@20%": 0.23143881482387477, "calibration/coverage@25%": 0.35299275318003914, "calibration/coverage@30%": 0.4241415423189824, "calibration/coverage@5%": 0.0265991927592955, "calibration/ece": 0.13534885462049426, "calibration/mean_confidence": 0.45254701467405384, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 603.4, "completions/max_terminated_length": 603.4, "completions/mean_length": 170.66796875, "completions/mean_terminated_length": 170.71863403320313, "completions/min_length": 27.8, "completions/min_terminated_length": 70.8, "epoch": 0.352, "grad_norm": 0.0011555871460586786, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 362169286.0, "reward": 0.8930242538452149, "reward_std": 0.10183399468660355, "rewards/accuracy_reward": 0.47333984375, "rewards/brier_reward": 0.7553327441215515, "rewards/confidence_uniqueness_reward": 0.9558841824531555, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0032868479378521443, "rewards/frontier_ece_reward": 0.01634308509528637, "rewards/frontier_entropy_batch_reward": -0.16214127838611603, "rewards/volume_coverage_0": 8.19065734767449e-10, "rewards/volume_coverage_1": 8.19065734767449e-10, "rewards/volume_coverage_10": 1.2110763365248688e-09, "rewards/volume_coverage_15": 2.4374769481451397e-09, "rewards/volume_coverage_20": 1.8794657538623305e-08, "rewards/volume_coverage_25": 3.4976089935412345e-08, "rewards/volume_coverage_5": 1.1241983650413234e-09, "signal/accuracy_reward/centered_abs_mean": 0.115557861328125, "signal/accuracy_reward/group_std_mean": 0.1505853056907654, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0577789306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0577789306640625, "signal/advantage_abs_mean": 0.07932186275720596, "signal/advantage_pre_scale_abs_mean": 0.07932186275720596, "signal/advantage_pre_scale_std": 0.12750938385725022, "signal/advantage_std": 0.12750938385725022, "signal/brier_reward/centered_abs_mean": 0.1764029860496521, "signal/brier_reward/group_std_mean": 0.22145257592201234, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01764029860496521, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01764029860496521, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012107652239501476, "signal/confidence_uniqueness_reward/group_std_mean": 0.016175054758787156, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001210765284486115, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001210765284486115, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027799428906291725, "signal/frontier_aurc_reward/group_std_mean": 0.004139097221195698, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.474928635114338e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.474928635114338e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04073281139135361, "signal/frontier_ece_reward/group_std_mean": 0.05458591654896736, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004073281120508909, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004073281120508909, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2442895472049713, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3224704086780548, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02442895546555519, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02442895546555519, "signal/volume_coverage_0/centered_abs_mean": 1.2662086726344945e-09, "signal/volume_coverage_0/group_std_mean": 1.5743466100226478e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.95, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2662086505167702e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.2662086505167702e-10, "signal/volume_coverage_1/centered_abs_mean": 1.2662086726344945e-09, "signal/volume_coverage_1/group_std_mean": 1.5743466100226478e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.95, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2662086505167702e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.2662086505167702e-10, "signal/volume_coverage_10/centered_abs_mean": 1.9809725651520616e-09, "signal/volume_coverage_10/group_std_mean": 2.462229799216065e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.95, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.9809724542164953e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.9809724542164953e-10, "signal/volume_coverage_15/centered_abs_mean": 3.419152831696248e-09, "signal/volume_coverage_15/group_std_mean": 4.25322423253105e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.89375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.4191526680250885e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.4191526680250885e-10, "signal/volume_coverage_20/centered_abs_mean": 1.8356375885986152e-08, "signal/volume_coverage_20/group_std_mean": 2.2897061674465035e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.759375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.835637512825894e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.835637512825894e-09, "signal/volume_coverage_25/centered_abs_mean": 3.538144828230117e-08, "signal/volume_coverage_25/group_std_mean": 4.4106969121493475e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.5381449908777896e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.5381449908777896e-09, "signal/volume_coverage_5/centered_abs_mean": 1.8225652334558085e-09, "signal/volume_coverage_5/group_std_mean": 2.2654556466883415e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.95, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.8225652779514656e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.8225652779514656e-10, "step": 110 }, { "calibration/aurc": 0.3922542259570845, "calibration/batch_distribution_entropy": 0.9862624023754109, "calibration/buffer_distribution_entropy": 0.99757564403655, "calibration/confidence_entropy": 0.4880892356336729, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.004296875, "calibration/coverage@15%": 0.008203125, "calibration/coverage@20%": 0.18046875, "calibration/coverage@25%": 0.243359375, "calibration/coverage@30%": 0.3765625, "calibration/coverage@5%": 0.004296875, "calibration/ece": 0.16619304690195144, "calibration/mean_confidence": 0.4991101124115054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 498.2, "completions/max_terminated_length": 498.2, "completions/mean_length": 172.508984375, "completions/mean_terminated_length": 172.5423095703125, "completions/min_length": 32.4, "completions/min_terminated_length": 58.4, "epoch": 0.368, "grad_norm": 0.00148635427467525, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 379001250.0, "reward": 0.8987860441207886, "reward_std": 0.10172230154275894, "rewards/accuracy_reward": 0.48759765625, "rewards/brier_reward": 0.7556616544723511, "rewards/confidence_uniqueness_reward": 0.956497323513031, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003505715122446418, "rewards/frontier_ece_reward": 0.015234233625233174, "rewards/frontier_entropy_batch_reward": -0.1756180554628372, "rewards/volume_coverage_0": 9.202538037156315e-13, "rewards/volume_coverage_1": 9.202538037156315e-13, "rewards/volume_coverage_10": -2.2965750828829455e-12, "rewards/volume_coverage_15": 9.785381289983963e-11, "rewards/volume_coverage_20": 3.1048119630838755e-10, "rewards/volume_coverage_25": 6.026713239570825e-10, "rewards/volume_coverage_5": 9.202538037156315e-13, "signal/accuracy_reward/centered_abs_mean": 0.110748291015625, "signal/accuracy_reward/group_std_mean": 0.15012845695018767, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0553741455078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0553741455078125, "signal/advantage_abs_mean": 0.07711823582649231, "signal/advantage_pre_scale_abs_mean": 0.07711823582649231, "signal/advantage_pre_scale_std": 0.12509591430425643, "signal/advantage_std": 0.12509591430425643, "signal/brier_reward/centered_abs_mean": 0.1711456745862961, "signal/brier_reward/group_std_mean": 0.21479713320732116, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017114568129181863, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017114568129181863, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012594586797058582, "signal/confidence_uniqueness_reward/group_std_mean": 0.016726733930408955, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012594586703926324, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012594586703926324, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003213394992053509, "signal/frontier_aurc_reward/group_std_mean": 0.004921545553952455, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.016743769170716e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.016743769170716e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03740856498479843, "signal/frontier_ece_reward/group_std_mean": 0.049930807948112485, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037408565636724233, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037408565636724233, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2576330900192261, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3383022129535675, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025763309746980666, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025763309746980666, "signal/volume_coverage_0/centered_abs_mean": 9.8125767233892e-11, "signal/volume_coverage_0/group_std_mean": 1.2552429720646962e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.812575838680226e-12, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 9.812575838680226e-12, "signal/volume_coverage_1/centered_abs_mean": 9.8125767233892e-11, "signal/volume_coverage_1/group_std_mean": 1.2552429720646962e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.812575838680226e-12, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 9.812575838680226e-12, "signal/volume_coverage_10/centered_abs_mean": 4.5797430708871544e-10, "signal/volume_coverage_10/group_std_mean": 5.857822844845817e-10, "signal/volume_coverage_10/group_zero_std_frac": 0.965625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.579742843638379e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.579742843638379e-11, "signal/volume_coverage_15/centered_abs_mean": 1.214225502088606e-09, "signal/volume_coverage_15/group_std_mean": 1.5410452643138672e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.925, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.214225457159268e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.214225457159268e-10, "signal/volume_coverage_20/centered_abs_mean": 1.8293037196137618e-09, "signal/volume_coverage_20/group_std_mean": 2.3235737112159425e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.9125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.8293037154504254e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.8293037154504254e-10, "signal/volume_coverage_25/centered_abs_mean": 2.8839780452649675e-09, "signal/volume_coverage_25/group_std_mean": 3.655950098213623e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.8875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.8839779661615774e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.8839779661615774e-10, "signal/volume_coverage_5/centered_abs_mean": 9.8125767233892e-11, "signal/volume_coverage_5/group_std_mean": 1.2552429720646962e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 9.812575838680226e-12, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 9.812575838680226e-12, "step": 115 }, { "calibration/aurc": 0.3513991983148937, "calibration/batch_distribution_entropy": 0.9860781630180767, "calibration/buffer_distribution_entropy": 0.998864146535581, "calibration/confidence_entropy": 0.4976471261852248, "calibration/coverage@0%": 0.022265625, "calibration/coverage@1%": 0.022265625, "calibration/coverage@10%": 0.072265625, "calibration/coverage@15%": 0.182421875, "calibration/coverage@20%": 0.242578125, "calibration/coverage@25%": 0.31484375, "calibration/coverage@30%": 0.3953125, "calibration/coverage@5%": 0.044140625, "calibration/ece": 0.14944111182549608, "calibration/mean_confidence": 0.46471985469835875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 627.0, "completions/max_terminated_length": 627.0, "completions/mean_length": 171.5482421875, "completions/mean_terminated_length": 171.61572875976563, "completions/min_length": 27.0, "completions/min_terminated_length": 68.6, "epoch": 0.384, "grad_norm": 0.0012787673622369766, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 395614416.0, "reward": 0.9182936072349548, "reward_std": 0.10047120004892349, "rewards/accuracy_reward": 0.52353515625, "rewards/brier_reward": 0.7724483251571655, "rewards/confidence_uniqueness_reward": 0.9581897497177124, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0029781535267829895, "rewards/frontier_ece_reward": 0.01786994356662035, "rewards/frontier_entropy_batch_reward": -0.18092238306999206, "rewards/volume_coverage_0": 4.383281107869785e-11, "rewards/volume_coverage_1": 4.383281107869785e-11, "rewards/volume_coverage_10": 4.383281107869785e-11, "rewards/volume_coverage_15": 8.713083666025967e-11, "rewards/volume_coverage_20": 4.434830815948365e-10, "rewards/volume_coverage_25": 6.633161402959387e-10, "rewards/volume_coverage_5": 4.383281107869785e-11, "signal/accuracy_reward/centered_abs_mean": 0.112371826171875, "signal/accuracy_reward/group_std_mean": 0.14683832228183746, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0561859130859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0561859130859375, "signal/advantage_abs_mean": 0.07782022804021835, "signal/advantage_pre_scale_abs_mean": 0.07782022804021835, "signal/advantage_pre_scale_std": 0.12562936544418335, "signal/advantage_std": 0.12562936544418335, "signal/brier_reward/centered_abs_mean": 0.1601964920759201, "signal/brier_reward/group_std_mean": 0.2036239355802536, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016019649058580398, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016019649058580398, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012591696158051491, "signal/confidence_uniqueness_reward/group_std_mean": 0.0170934084802866, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001259169657714665, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001259169657714665, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033347518648952244, "signal/frontier_aurc_reward/group_std_mean": 0.0051669498905539514, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.168439918430522e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.168439918430522e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.034313207119703294, "signal/frontier_ece_reward/group_std_mean": 0.04498266875743866, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003431320795789361, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003431320795789361, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2615587115287781, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3394892454147339, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026155871525406837, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026155871525406837, "signal/volume_coverage_0/centered_abs_mean": 1.3037857460318492e-10, "signal/volume_coverage_0/group_std_mean": 1.6798773205817242e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_1/centered_abs_mean": 1.3037857460318492e-10, "signal/volume_coverage_1/group_std_mean": 1.6798773205817242e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_10/centered_abs_mean": 1.3037857460318492e-10, "signal/volume_coverage_10/group_std_mean": 1.6798773205817242e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_15/centered_abs_mean": 3.371282447650614e-10, "signal/volume_coverage_15/group_std_mean": 4.31999752636969e-10, "signal/volume_coverage_15/group_zero_std_frac": 1.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.371282558672917e-11, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.371282558672917e-11, "signal/volume_coverage_20/centered_abs_mean": 1.909953942225151e-09, "signal/volume_coverage_20/group_std_mean": 2.4386795027808716e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.953125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.9099540005118598e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.9099540005118598e-10, "signal/volume_coverage_25/centered_abs_mean": 3.69576863834542e-09, "signal/volume_coverage_25/group_std_mean": 4.7769367295114055e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.903125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.69576855924203e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.69576855924203e-10, "signal/volume_coverage_5/centered_abs_mean": 1.3037857460318492e-10, "signal/volume_coverage_5/group_std_mean": 1.6798773205817242e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3037858639930455e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.3037858639930455e-11, "step": 120 }, { "calibration/aurc": 0.4437287155933821, "calibration/batch_distribution_entropy": 0.9884368812661011, "calibration/buffer_distribution_entropy": 0.9989521601251751, "calibration/confidence_entropy": 0.5128253874437341, "calibration/coverage@0%": 0.002740502450980392, "calibration/coverage@1%": 0.002740502450980392, "calibration/coverage@10%": 0.005865502450980392, "calibration/coverage@15%": 0.005865502450980392, "calibration/coverage@20%": 0.007037377450980392, "calibration/coverage@25%": 0.046881127450980394, "calibration/coverage@30%": 0.13672947303921568, "calibration/coverage@5%": 0.002740502450980392, "calibration/ece": 0.15523985953050676, "calibration/mean_confidence": 0.5022496927820296, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 533.0, "completions/max_terminated_length": 533.0, "completions/mean_length": 167.06259765625, "completions/mean_terminated_length": 167.11257629394532, "completions/min_length": 43.8, "completions/min_terminated_length": 69.2, "epoch": 0.4, "grad_norm": 0.001277610776014626, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 412361585.0, "reward": 0.9021111249923706, "reward_std": 0.10790342837572098, "rewards/accuracy_reward": 0.49384765625, "rewards/brier_reward": 0.7573448777198791, "rewards/confidence_uniqueness_reward": 0.9609808683395386, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0036756881512701512, "rewards/frontier_ece_reward": 0.013616420142352582, "rewards/frontier_entropy_batch_reward": -0.17814513444900512, "rewards/volume_coverage_0": 1.0085748902710634e-10, "rewards/volume_coverage_1": 1.0085748902710634e-10, "rewards/volume_coverage_10": 6.06503069988662e-11, "rewards/volume_coverage_15": -2.9825425462703946e-11, "rewards/volume_coverage_20": 8.66519167530555e-10, "rewards/volume_coverage_25": 6.701753374471764e-10, "rewards/volume_coverage_5": 1.0085748902710634e-10, "signal/accuracy_reward/centered_abs_mean": 0.126629638671875, "signal/accuracy_reward/group_std_mean": 0.1655414193868637, "signal/accuracy_reward/group_zero_std_frac": 0.525, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633148193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0633148193359375, "signal/advantage_abs_mean": 0.0843367651104927, "signal/advantage_pre_scale_abs_mean": 0.0843367651104927, "signal/advantage_pre_scale_std": 0.13356164544820787, "signal/advantage_std": 0.13356164544820787, "signal/brier_reward/centered_abs_mean": 0.16698363721370696, "signal/brier_reward/group_std_mean": 0.21023752689361572, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01669836454093456, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01669836454093456, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012285609915852546, "signal/confidence_uniqueness_reward/group_std_mean": 0.015908705443143843, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012285609962418675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012285609962418675, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003910475643351674, "signal/frontier_aurc_reward/group_std_mean": 0.00609401436522603, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.888094772468321e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.888094772468321e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03086501657962799, "signal/frontier_ece_reward/group_std_mean": 0.0409327894449234, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003086501592770219, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003086501592770219, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26057218909263613, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3405936896800995, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026057218760252, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026057218760252, "signal/volume_coverage_0/centered_abs_mean": 8.354765185236701e-10, "signal/volume_coverage_0/group_std_mean": 1.0910699621557996e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.965625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.354765455853563e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.354765455853563e-11, "signal/volume_coverage_1/centered_abs_mean": 8.354765185236701e-10, "signal/volume_coverage_1/group_std_mean": 1.0910699621557996e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.965625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.354765455853563e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.354765455853563e-11, "signal/volume_coverage_10/centered_abs_mean": 9.910879533725493e-10, "signal/volume_coverage_10/group_std_mean": 1.2953009054239572e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.959375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 9.910879582297749e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 9.910879582297749e-11, "signal/volume_coverage_15/centered_abs_mean": 2.1594268345692667e-09, "signal/volume_coverage_15/group_std_mean": 2.8284628694752454e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.925, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1594270364910796e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.1594270364910796e-10, "signal/volume_coverage_20/centered_abs_mean": 3.882546484312854e-09, "signal/volume_coverage_20/group_std_mean": 5.0664275552669835e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.8825464718228453e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.8825464718228453e-10, "signal/volume_coverage_25/centered_abs_mean": 1.0074538070448113e-08, "signal/volume_coverage_25/group_std_mean": 1.3182782165888085e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.85, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.0074538168980408e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.0074538168980408e-09, "signal/volume_coverage_5/centered_abs_mean": 8.354765185236701e-10, "signal/volume_coverage_5/group_std_mean": 1.0910699621557996e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.965625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.354765455853563e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 8.354765455853563e-11, "step": 125 }, { "calibration/aurc": 0.35445632284315787, "calibration/batch_distribution_entropy": 0.9813028422811737, "calibration/buffer_distribution_entropy": 0.9989037529411805, "calibration/confidence_entropy": 0.5254088005465698, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.005078125, "calibration/coverage@15%": 0.0140625, "calibration/coverage@20%": 0.07722249508840864, "calibration/coverage@25%": 0.16574088285854618, "calibration/coverage@30%": 0.2815293467583497, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.09861364113925782, "calibration/mean_confidence": 0.5204927474029243, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 465.2, "completions/max_terminated_length": 465.2, "completions/mean_length": 166.2703125, "completions/mean_terminated_length": 166.31866149902345, "completions/min_length": 38.2, "completions/min_terminated_length": 63.6, "epoch": 0.416, "grad_norm": 0.0013528935378417373, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 428945377.0, "reward": 0.9096973180770874, "reward_std": 0.1019922986626625, "rewards/accuracy_reward": 0.5060546875, "rewards/brier_reward": 0.7639304637908936, "rewards/confidence_uniqueness_reward": 0.96273832321167, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003395315073430538, "rewards/frontier_ece_reward": 0.013873597048223019, "rewards/frontier_entropy_batch_reward": -0.17146520018577577, "rewards/volume_coverage_0": 7.473216906417335e-11, "rewards/volume_coverage_1": 7.473216906417335e-11, "rewards/volume_coverage_10": 7.473216906417335e-11, "rewards/volume_coverage_15": 7.473216906417335e-11, "rewards/volume_coverage_20": 7.026016463074214e-10, "rewards/volume_coverage_25": 1.0181003540221667e-09, "rewards/volume_coverage_5": 7.473216906417335e-11, "signal/accuracy_reward/centered_abs_mean": 0.11749267578125, "signal/accuracy_reward/group_std_mean": 0.15149664878845215, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058746337890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.058746337890625, "signal/advantage_abs_mean": 0.07992852181196212, "signal/advantage_pre_scale_abs_mean": 0.07992852181196212, "signal/advantage_pre_scale_std": 0.1282731533050537, "signal/advantage_std": 0.1282731533050537, "signal/brier_reward/centered_abs_mean": 0.1657171666622162, "signal/brier_reward/group_std_mean": 0.20892676711082458, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016571716964244844, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016571716964244844, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012295385263860226, "signal/confidence_uniqueness_reward/group_std_mean": 0.016405154950916767, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001229538512416184, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001229538512416184, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003926783287897706, "signal/frontier_aurc_reward/group_std_mean": 0.0064892381429672245, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.908479022560641e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.908479022560641e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02886553555727005, "signal/frontier_ece_reward/group_std_mean": 0.03819820955395699, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028865536209195853, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028865536209195853, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25341747999191283, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3332586348056793, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02534174807369709, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02534174807369709, "signal/volume_coverage_0/centered_abs_mean": 1.8584461053405832e-10, "signal/volume_coverage_0/group_std_mean": 2.3160315193448966e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_1/centered_abs_mean": 1.8584461053405832e-10, "signal/volume_coverage_1/group_std_mean": 2.3160315193448966e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_10/centered_abs_mean": 1.8584461053405832e-10, "signal/volume_coverage_10/group_std_mean": 2.3160315193448966e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_15/centered_abs_mean": 1.8584461053405832e-10, "signal/volume_coverage_15/group_std_mean": 2.3160315193448966e-10, "signal/volume_coverage_15/group_zero_std_frac": 1.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_20/centered_abs_mean": 1.476313882253777e-09, "signal/volume_coverage_20/group_std_mean": 1.8577961946597554e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.96875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.4763138836415557e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.4763138836415557e-10, "signal/volume_coverage_25/centered_abs_mean": 2.8416659803021104e-09, "signal/volume_coverage_25/group_std_mean": 3.5887031346604203e-09, "signal/volume_coverage_25/group_zero_std_frac": 0.896875, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.8416659927921194e-10, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.8416659927921194e-10, "signal/volume_coverage_5/centered_abs_mean": 1.8584461053405832e-10, "signal/volume_coverage_5/group_std_mean": 2.3160315193448966e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.858446053298879e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.858446053298879e-11, "step": 130 }, { "calibration/aurc": 0.2953629301468977, "calibration/batch_distribution_entropy": 0.9862223812365766, "calibration/buffer_distribution_entropy": 0.9991731870187508, "calibration/confidence_entropy": 0.4962810437639968, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.0664528803816047, "calibration/coverage@15%": 0.11689930895303327, "calibration/coverage@20%": 0.30368838674168297, "calibration/coverage@25%": 0.3994855369373777, "calibration/coverage@30%": 0.4749120902641879, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.11536401207599001, "calibration/mean_confidence": 0.5435637005194328, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 522.8, "completions/max_terminated_length": 522.8, "completions/mean_length": 164.665234375, "completions/mean_terminated_length": 164.7296905517578, "completions/min_length": 14.6, "completions/min_terminated_length": 68.0, "epoch": 0.432, "grad_norm": 0.001812662580050528, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 445645885.0, "reward": 0.9282691955566407, "reward_std": 0.09776676595211028, "rewards/accuracy_reward": 0.5431640625, "rewards/brier_reward": 0.7781499981880188, "rewards/confidence_uniqueness_reward": 0.9612111330032349, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002956994064152241, "rewards/frontier_ece_reward": 0.016237000189721584, "rewards/frontier_entropy_batch_reward": -0.1859154611825943, "rewards/volume_coverage_0": 1.439715041628986e-10, "rewards/volume_coverage_1": 1.439715041628986e-10, "rewards/volume_coverage_10": 3.679456433980377e-10, "rewards/volume_coverage_15": 5.91844750985615e-10, "rewards/volume_coverage_20": 1.4051429186867637e-09, "rewards/volume_coverage_25": 6.444645928027626e-09, "rewards/volume_coverage_5": 1.439715041628986e-10, "signal/accuracy_reward/centered_abs_mean": 0.10758056640625, "signal/accuracy_reward/group_std_mean": 0.1392093226313591, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053790283203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.053790283203125, "signal/advantage_abs_mean": 0.07623622268438339, "signal/advantage_pre_scale_abs_mean": 0.07623622268438339, "signal/advantage_pre_scale_std": 0.12360891848802566, "signal/advantage_std": 0.12360891848802566, "signal/brier_reward/centered_abs_mean": 0.15707127153873443, "signal/brier_reward/group_std_mean": 0.19963068664073944, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01570712644606829, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01570712644606829, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012181778438389301, "signal/confidence_uniqueness_reward/group_std_mean": 0.01665004901587963, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001218177890405059, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001218177890405059, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0038814083207398655, "signal/frontier_aurc_reward/group_std_mean": 0.006444942206144333, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8517604591324924e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8517604591324924e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.027643512561917306, "signal/frontier_ece_reward/group_std_mean": 0.036360897868871686, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002764351200312376, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002764351200312376, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26680874824523926, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3425988554954529, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026680874824523925, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026680874824523925, "signal/volume_coverage_0/centered_abs_mean": 3.116232355493409e-10, "signal/volume_coverage_0/group_std_mean": 3.9976172094835506e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.116232358962856e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.116232358962856e-11, "signal/volume_coverage_1/centered_abs_mean": 3.116232355493409e-10, "signal/volume_coverage_1/group_std_mean": 3.9976172094835506e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.116232358962856e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.116232358962856e-11, "signal/volume_coverage_10/centered_abs_mean": 8.034230308817669e-10, "signal/volume_coverage_10/group_std_mean": 1.0218955376339877e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.96875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.034230242898177e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.034230242898177e-11, "signal/volume_coverage_15/centered_abs_mean": 1.9115799082536356e-09, "signal/volume_coverage_15/group_std_mean": 2.4280549376642924e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.93125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.9115798128438444e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.9115798128438444e-10, "signal/volume_coverage_20/centered_abs_mean": 3.9429249309996806e-09, "signal/volume_coverage_20/group_std_mean": 5.0118225125572735e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.828125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.942925042021983e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.942925042021983e-10, "signal/volume_coverage_25/centered_abs_mean": 1.0928629468054396e-08, "signal/volume_coverage_25/group_std_mean": 1.3933071763005955e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.6125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.0928629334827632e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.0928629334827632e-09, "signal/volume_coverage_5/centered_abs_mean": 3.116232355493409e-10, "signal/volume_coverage_5/group_std_mean": 3.9976172094835506e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.116232358962856e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.116232358962856e-11, "step": 135 }, { "calibration/aurc": 0.30513196272360465, "calibration/batch_distribution_entropy": 0.9789796952184366, "calibration/buffer_distribution_entropy": 0.9992924953685062, "calibration/confidence_entropy": 0.5081590689078211, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.036328125, "calibration/coverage@15%": 0.1, "calibration/coverage@20%": 0.208984375, "calibration/coverage@25%": 0.295703125, "calibration/coverage@30%": 0.52109375, "calibration/coverage@5%": 0.001953125, "calibration/ece": 0.12843204989485463, "calibration/mean_confidence": 0.5509481067455588, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.2, "completions/max_terminated_length": 590.2, "completions/mean_length": 170.64814453125, "completions/mean_terminated_length": 170.64814453125, "completions/min_length": 63.6, "completions/min_terminated_length": 63.6, "epoch": 0.448, "grad_norm": 0.001530295587144792, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 462346122.0, "reward": 0.9137397766113281, "reward_std": 0.09540319591760635, "rewards/accuracy_reward": 0.51552734375, "rewards/brier_reward": 0.770829725265503, "rewards/confidence_uniqueness_reward": 0.9601699829101562, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0034271135926246645, "rewards/frontier_ece_reward": 0.012508058547973632, "rewards/frontier_entropy_batch_reward": -0.18331859707832338, "rewards/volume_coverage_0": 1.893049966783522e-10, "rewards/volume_coverage_1": 1.893049966783522e-10, "rewards/volume_coverage_10": 1.893049966783522e-10, "rewards/volume_coverage_15": 2.776873092580345e-10, "rewards/volume_coverage_20": 1.1103639230292117e-09, "rewards/volume_coverage_25": 1.2780476343898783e-08, "rewards/volume_coverage_5": 1.893049966783522e-10, "signal/accuracy_reward/centered_abs_mean": 0.106903076171875, "signal/accuracy_reward/group_std_mean": 0.1370233952999115, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0534515380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0534515380859375, "signal/advantage_abs_mean": 0.07518869191408158, "signal/advantage_pre_scale_abs_mean": 0.07518869191408158, "signal/advantage_pre_scale_std": 0.1213814526796341, "signal/advantage_std": 0.1213814526796341, "signal/brier_reward/centered_abs_mean": 0.16107324361801148, "signal/brier_reward/group_std_mean": 0.20332952439785004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016107324883341788, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016107324883341788, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011767148971557617, "signal/confidence_uniqueness_reward/group_std_mean": 0.014688951708376408, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001176714920438826, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001176714920438826, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0038240184541791676, "signal/frontier_aurc_reward/group_std_mean": 0.006383162178099156, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.780023155035451e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.780023155035451e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02547091469168663, "signal/frontier_ece_reward/group_std_mean": 0.03375823795795441, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002547091618180275, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002547091618180275, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2633515000343323, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3400727391242981, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026335151121020316, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026335151121020316, "signal/volume_coverage_0/centered_abs_mean": 6.843173694370819e-10, "signal/volume_coverage_0/group_std_mean": 8.689262309680146e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_1/centered_abs_mean": 6.843173694370819e-10, "signal/volume_coverage_1/group_std_mean": 8.689262309680146e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_10/centered_abs_mean": 6.843173694370819e-10, "signal/volume_coverage_10/group_std_mean": 8.689262309680146e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_15/centered_abs_mean": 1.210085631697666e-09, "signal/volume_coverage_15/group_std_mean": 1.518687620105652e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.984375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.2100856011665328e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.2100856011665328e-10, "signal/volume_coverage_20/centered_abs_mean": 4.973534961649761e-09, "signal/volume_coverage_20/group_std_mean": 6.1977475152019675e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.884375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.97353498662978e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 4.97353498662978e-10, "signal/volume_coverage_25/centered_abs_mean": 2.4731123815513456e-08, "signal/volume_coverage_25/group_std_mean": 3.0804845252419e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.55, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.473112403755806e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.473112403755806e-09, "signal/volume_coverage_5/centered_abs_mean": 6.843173694370819e-10, "signal/volume_coverage_5/group_std_mean": 8.689262309680146e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.843173597226304e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 6.843173597226304e-11, "step": 140 }, { "calibration/aurc": 0.4182242037120104, "calibration/batch_distribution_entropy": 0.9825868175253107, "calibration/buffer_distribution_entropy": 0.9992167885447563, "calibration/confidence_entropy": 0.528219422297434, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.001171875, "calibration/coverage@15%": 0.002734375, "calibration/coverage@20%": 0.07429519324853229, "calibration/coverage@25%": 0.11929504036203523, "calibration/coverage@30%": 0.24838169642857144, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.13537476420506195, "calibration/mean_confidence": 0.48931586147542616, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 549.2, "completions/max_terminated_length": 549.2, "completions/mean_length": 173.40908203125, "completions/mean_terminated_length": 173.5105773925781, "completions/min_length": 16.0, "completions/min_terminated_length": 72.8, "epoch": 0.464, "grad_norm": 0.0009655957692302763, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 479292647.0, "reward": 0.8837283730506897, "reward_std": 0.09050512313842773, "rewards/accuracy_reward": 0.460546875, "rewards/brier_reward": 0.7583501458168029, "rewards/confidence_uniqueness_reward": 0.957415759563446, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003975970856845379, "rewards/frontier_ece_reward": 0.009838496148586274, "rewards/frontier_entropy_batch_reward": -0.1871403008699417, "rewards/volume_coverage_0": 3.040359616846011e-10, "rewards/volume_coverage_1": 3.040359616846011e-10, "rewards/volume_coverage_10": 9.855517194012898e-10, "rewards/volume_coverage_15": 2.2260005616558944e-09, "rewards/volume_coverage_20": 4.769506212198848e-09, "rewards/volume_coverage_25": 2.9589145800201777e-08, "rewards/volume_coverage_5": 3.040359616846011e-10, "signal/accuracy_reward/centered_abs_mean": 0.08966064453125, "signal/accuracy_reward/group_std_mean": 0.12274214625358582, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044830322265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044830322265625, "signal/advantage_abs_mean": 0.06799145862460136, "signal/advantage_pre_scale_abs_mean": 0.06799145862460136, "signal/advantage_pre_scale_std": 0.11409917026758194, "signal/advantage_std": 0.11409917026758194, "signal/brier_reward/centered_abs_mean": 0.15883066058158873, "signal/brier_reward/group_std_mean": 0.20072786509990692, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01588306687772274, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01588306687772274, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01368649173527956, "signal/confidence_uniqueness_reward/group_std_mean": 0.019033579528331755, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013686491874977946, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013686491874977946, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_std_mean": 0.0035306816920638085, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0037599447183310985, "signal/frontier_aurc_reward/group_std_mean": 0.006334328558295965, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6999311598483474e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6999311598483474e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02307127006351948, "signal/frontier_ece_reward/group_std_mean": 0.03083600252866745, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002307126997038722, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002307126997038722, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2677737444639206, "signal/frontier_entropy_batch_reward/group_std_mean": 0.345480477809906, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026777375489473343, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026777375489473343, "signal/volume_coverage_0/centered_abs_mean": 4.1880419132667155e-10, "signal/volume_coverage_0/group_std_mean": 5.327342095240439e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.1880420312279123e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.1880420312279123e-11, "signal/volume_coverage_1/centered_abs_mean": 4.1880419132667155e-10, "signal/volume_coverage_1/group_std_mean": 5.327342095240439e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.1880420312279123e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.1880420312279123e-11, "signal/volume_coverage_10/centered_abs_mean": 1.0768645974934543e-09, "signal/volume_coverage_10/group_std_mean": 1.392269344258068e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.0768645974934543e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.0768645974934543e-10, "signal/volume_coverage_15/centered_abs_mean": 3.6891373511593883e-09, "signal/volume_coverage_15/group_std_mean": 4.7468447395004885e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.840625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.6891373567105034e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.6891373567105034e-10, "signal/volume_coverage_20/centered_abs_mean": 9.620456786763043e-09, "signal/volume_coverage_20/group_std_mean": 1.240889400122569e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.575, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 9.620456764558582e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 9.620456764558582e-10, "signal/volume_coverage_25/centered_abs_mean": 6.344143557157623e-08, "signal/volume_coverage_25/group_std_mean": 8.170086047698533e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.190625, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 6.344143699266169e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 6.344143699266169e-09, "signal/volume_coverage_5/centered_abs_mean": 4.1880419132667155e-10, "signal/volume_coverage_5/group_std_mean": 5.327342095240439e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.1880420312279123e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.1880420312279123e-11, "step": 145 }, { "calibration/aurc": 0.3298085505781082, "calibration/batch_distribution_entropy": 0.981574292660393, "calibration/buffer_distribution_entropy": 0.9993247562967712, "calibration/confidence_entropy": 0.49599020856948994, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.0046875, "calibration/coverage@15%": 0.0046875, "calibration/coverage@20%": 0.19724651418786693, "calibration/coverage@25%": 0.37394966976516636, "calibration/coverage@30%": 0.4459171660958904, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.14921269561400377, "calibration/mean_confidence": 0.5074699378823706, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 550.8, "completions/max_terminated_length": 550.8, "completions/mean_length": 173.545703125, "completions/mean_terminated_length": 173.6138458251953, "completions/min_length": 24.2, "completions/min_terminated_length": 70.2, "epoch": 0.48, "grad_norm": 0.001252944814041257, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 496117787.0, "reward": 0.9171468496322632, "reward_std": 0.10072523653507233, "rewards/accuracy_reward": 0.52587890625, "rewards/brier_reward": 0.7633208990097046, "rewards/confidence_uniqueness_reward": 0.958120334148407, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0032815839163959025, "rewards/frontier_ece_reward": 0.010945961438119411, "rewards/frontier_entropy_batch_reward": -0.1874622732400894, "rewards/volume_coverage_0": 5.831987115456627e-10, "rewards/volume_coverage_1": 5.831987115456627e-10, "rewards/volume_coverage_10": 1.4444269114122221e-09, "rewards/volume_coverage_15": 3.3241117414206656e-09, "rewards/volume_coverage_20": 5.639911000443476e-08, "rewards/volume_coverage_25": 4.1795596104066137e-07, "rewards/volume_coverage_5": 5.831987115456627e-10, "signal/accuracy_reward/centered_abs_mean": 0.114166259765625, "signal/accuracy_reward/group_std_mean": 0.15151501297950745, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0570831298828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0570831298828125, "signal/advantage_abs_mean": 0.07722624242305756, "signal/advantage_pre_scale_abs_mean": 0.07722624242305756, "signal/advantage_pre_scale_std": 0.12476578801870346, "signal/advantage_std": 0.12476578801870346, "signal/brier_reward/centered_abs_mean": 0.16023366451263427, "signal/brier_reward/group_std_mean": 0.2034299075603485, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01602336745709181, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01602336745709181, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012775503285229205, "signal/confidence_uniqueness_reward/group_std_mean": 0.017437696829438208, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012775503797456621, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012775503797456621, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003498147753998637, "signal/frontier_aurc_reward/group_std_mean": 0.00594499446451664, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3726846342906356e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3726846342906356e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02257801927626133, "signal/frontier_ece_reward/group_std_mean": 0.029622122645378113, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022578019183129073, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022578019183129073, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2693198204040527, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3461536645889282, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02693198397755623, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02693198397755623, "signal/volume_coverage_0/centered_abs_mean": 1.38831864893163e-09, "signal/volume_coverage_0/group_std_mean": 1.7580332856148572e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.975, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.388318682238321e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.388318682238321e-10, "signal/volume_coverage_1/centered_abs_mean": 1.38831864893163e-09, "signal/volume_coverage_1/group_std_mean": 1.7580332856148572e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.975, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.388318682238321e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.388318682238321e-10, "signal/volume_coverage_10/centered_abs_mean": 3.6237949530004697e-09, "signal/volume_coverage_10/group_std_mean": 4.640102768682653e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.85, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.6237949654904787e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.6237949654904787e-10, "signal/volume_coverage_15/centered_abs_mean": 8.293814324211724e-09, "signal/volume_coverage_15/group_std_mean": 1.0603659017505151e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.671875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.293814079962658e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 8.293814079962658e-10, "signal/volume_coverage_20/centered_abs_mean": 9.955687936269442e-08, "signal/volume_coverage_20/group_std_mean": 1.2609622110915097e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.421875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 9.955688029528175e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 9.955688029528175e-09, "signal/volume_coverage_25/centered_abs_mean": 8.92666632523742e-07, "signal/volume_coverage_25/group_std_mean": 1.1310221950111554e-06, "signal/volume_coverage_25/group_zero_std_frac": 0.2625, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 8.926666517083959e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 8.926666517083959e-08, "signal/volume_coverage_5/centered_abs_mean": 1.38831864893163e-09, "signal/volume_coverage_5/group_std_mean": 1.7580332856148572e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.975, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.388318682238321e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.388318682238321e-10, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.467700578320845, "eval_calibration/batch_distribution_entropy": 0.9465067652345415, "eval_calibration/buffer_distribution_entropy": 0.9993365061314878, "eval_calibration/confidence_entropy": 0.4856897573337645, "eval_calibration/coverage@0%": 0.0234375, "eval_calibration/coverage@1%": 0.0234375, "eval_calibration/coverage@10%": 0.0234375, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.2109375, "eval_calibration/coverage@25%": 0.296875, "eval_calibration/coverage@30%": 0.3125, "eval_calibration/coverage@5%": 0.0234375, "eval_calibration/ece": 0.24495353732649747, "eval_calibration/mean_confidence": 0.49381096837038924, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 344.5, "eval_completions/max_terminated_length": 344.5, "eval_completions/mean_length": 177.96430587768555, "eval_completions/mean_terminated_length": 177.96430587768555, "eval_completions/min_length": 97.25, "eval_completions/min_terminated_length": 97.25, "eval_loss": 0.0, "eval_num_tokens": 496117787.0, "eval_reward": 0.7738081067800522, "eval_reward_std": 0.25044039636850357, "eval_rewards/accuracy_reward": 0.408203125, "eval_rewards/brier_reward": 0.7795219719409943, "eval_rewards/confidence_uniqueness_reward": 0.90625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.00398009066702798, "eval_rewards/frontier_ece_reward": 0.011790585471317172, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_rewards/volume_coverage_0": 1.088547529892736e-09, "eval_rewards/volume_coverage_1": 1.088547529892736e-09, "eval_rewards/volume_coverage_10": 3.2117084824978548e-09, "eval_rewards/volume_coverage_15": 4.17547902503701e-09, "eval_rewards/volume_coverage_20": 4.32811551043244e-08, "eval_rewards/volume_coverage_25": 4.155160429064608e-07, "eval_rewards/volume_coverage_5": 1.088547529892736e-09, "eval_runtime": 18.8068, "eval_samples_per_second": 26.586, "eval_signal/accuracy_reward/centered_abs_mean": 0.4661865234375, "eval_signal/accuracy_reward/group_std_mean": 0.49009719491004944, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23309326171875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23309326171875, "eval_signal/advantage_abs_mean": 0.23672707751393318, "eval_signal/advantage_pre_scale_abs_mean": 0.23672707751393318, "eval_signal/advantage_pre_scale_std": 0.24762531742453575, "eval_signal/advantage_std": 0.24762531742453575, "eval_signal/brier_reward/centered_abs_mean": 0.19656691700220108, "eval_signal/brier_reward/group_std_mean": 0.24666643142700195, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019656691700220108, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019656691700220108, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0359039306640625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.042795900255441666, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035903931129723787, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035903931129723787, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005310411681421101, "eval_signal/frontier_aurc_reward/group_std_mean": 0.00943018146790564, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.638014838244999e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.638014838244999e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.02532364521175623, "eval_signal/frontier_ece_reward/group_std_mean": 0.035478693433105946, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025323645095340908, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025323645095340908, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_signal/volume_coverage_0/centered_abs_mean": 2.4198223147475773e-09, "eval_signal/volume_coverage_0/group_std_mean": 3.0092013880711477e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.9375, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.419822398014304e-10, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 2.419822398014304e-10, "eval_signal/volume_coverage_1/centered_abs_mean": 2.4198223147475773e-09, "eval_signal/volume_coverage_1/group_std_mean": 3.0092013880711477e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.9375, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.419822398014304e-10, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 2.419822398014304e-10, "eval_signal/volume_coverage_10/centered_abs_mean": 7.609845725786712e-09, "eval_signal/volume_coverage_10/group_std_mean": 9.562367619952994e-09, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.6875, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.609846051914726e-10, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 7.609846051914726e-10, "eval_signal/volume_coverage_15/centered_abs_mean": 1.0576835796083373e-08, "eval_signal/volume_coverage_15/group_std_mean": 1.3268543486155693e-08, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.625, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.057683600425019e-09, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 1.057683600425019e-09, "eval_signal/volume_coverage_20/centered_abs_mean": 6.025526255726277e-08, "eval_signal/volume_coverage_20/group_std_mean": 7.222980880072782e-08, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.5, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.025526522179803e-09, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 6.025526522179803e-09, "eval_signal/volume_coverage_25/centered_abs_mean": 5.649726659839871e-07, "eval_signal/volume_coverage_25/group_std_mean": 6.848031688377887e-07, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.4375, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.6497268641209075e-08, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 5.6497268641209075e-08, "eval_signal/volume_coverage_5/centered_abs_mean": 2.4198223147475773e-09, "eval_signal/volume_coverage_5/group_std_mean": 3.0092013880711477e-09, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.9375, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.419822398014304e-10, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 2.419822398014304e-10, "eval_steps_per_second": 0.213, "step": 150 }, { "calibration/aurc": 0.4152612608407611, "calibration/batch_distribution_entropy": 0.9864104103777949, "calibration/buffer_distribution_entropy": 0.999336643054435, "calibration/confidence_entropy": 0.49038454056488645, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.013671875, "calibration/coverage@15%": 0.123046875, "calibration/coverage@20%": 0.184765625, "calibration/coverage@25%": 0.232421875, "calibration/coverage@30%": 0.28046875, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.14502903763292493, "calibration/mean_confidence": 0.5353343759131202, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 468.4, "completions/max_terminated_length": 468.4, "completions/mean_length": 177.9943359375, "completions/mean_terminated_length": 178.02895812988282, "completions/min_length": 49.4, "completions/min_terminated_length": 76.0, "epoch": 0.496, "grad_norm": 0.0010539100039750338, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 513248289.0, "reward": 0.9261873483657836, "reward_std": 0.09546184092760086, "rewards/accuracy_reward": 0.5423828125, "rewards/brier_reward": 0.7670300483703614, "rewards/confidence_uniqueness_reward": 0.958674430847168, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0033624432515352966, "rewards/frontier_ece_reward": 0.010776512883603572, "rewards/frontier_entropy_batch_reward": -0.18512516021728515, "rewards/volume_coverage_0": 5.505965895019615e-10, "rewards/volume_coverage_1": 5.505965895019615e-10, "rewards/volume_coverage_10": 1.0167482748096867e-09, "rewards/volume_coverage_15": 3.6165074934757515e-09, "rewards/volume_coverage_20": 3.7402842067990604e-08, "rewards/volume_coverage_25": 6.125171125859197e-07, "rewards/volume_coverage_5": 5.505965895019615e-10, "signal/accuracy_reward/centered_abs_mean": 0.09814453125, "signal/accuracy_reward/group_std_mean": 0.13675257414579392, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049072265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049072265625, "signal/advantage_abs_mean": 0.07115045487880707, "signal/advantage_pre_scale_abs_mean": 0.07115045487880707, "signal/advantage_pre_scale_std": 0.11864184141159058, "signal/advantage_std": 0.11864184141159058, "signal/brier_reward/centered_abs_mean": 0.1540187805891037, "signal/brier_reward/group_std_mean": 0.19588074684143067, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015401878207921983, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015401878207921983, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012197751179337501, "signal/confidence_uniqueness_reward/group_std_mean": 0.015673059970140457, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012197751319035887, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012197751319035887, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0037505661603063345, "signal/frontier_aurc_reward/group_std_mean": 0.006236158590763807, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.688207554863766e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.688207554863766e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.020873067528009416, "signal/frontier_ece_reward/group_std_mean": 0.02787470892071724, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020873067667707803, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020873067667707803, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27026124596595763, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34854318499565123, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0270261250436306, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0270261250436306, "signal/volume_coverage_0/centered_abs_mean": 7.434120190968896e-10, "signal/volume_coverage_0/group_std_mean": 9.770461585623114e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.975, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.434120428626012e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.434120428626012e-11, "signal/volume_coverage_1/centered_abs_mean": 7.434120190968896e-10, "signal/volume_coverage_1/group_std_mean": 9.770461585623114e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.975, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.434120428626012e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.434120428626012e-11, "signal/volume_coverage_10/centered_abs_mean": 2.5940776440108727e-09, "signal/volume_coverage_10/group_std_mean": 3.379073677117761e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.903125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.594077788513338e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.594077788513338e-10, "signal/volume_coverage_15/centered_abs_mean": 5.8909916944571705e-09, "signal/volume_coverage_15/group_std_mean": 7.582481664769603e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.778125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.890991722212746e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.890991722212746e-10, "signal/volume_coverage_20/centered_abs_mean": 6.449160707688862e-08, "signal/volume_coverage_20/group_std_mean": 8.150530064199302e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.46875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.449160672161725e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 6.449160672161725e-09, "signal/volume_coverage_25/centered_abs_mean": 8.17778817463477e-07, "signal/volume_coverage_25/group_std_mean": 1.035122755865814e-06, "signal/volume_coverage_25/group_zero_std_frac": 0.259375, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 8.177788402008446e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 8.177788402008446e-08, "signal/volume_coverage_5/centered_abs_mean": 7.434120190968896e-10, "signal/volume_coverage_5/group_std_mean": 9.770461585623114e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.975, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.434120428626012e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.434120428626012e-11, "step": 155 }, { "calibration/aurc": 0.36293904971933433, "calibration/batch_distribution_entropy": 0.9848749541280609, "calibration/buffer_distribution_entropy": 0.9992323219323538, "calibration/confidence_entropy": 0.518724464749172, "calibration/coverage@0%": 0.012519110812133073, "calibration/coverage@1%": 0.012519110812133073, "calibration/coverage@10%": 0.1498570511252446, "calibration/coverage@15%": 0.19367279231898238, "calibration/coverage@20%": 0.26053235078277887, "calibration/coverage@25%": 0.37265625, "calibration/coverage@30%": 0.4265625, "calibration/coverage@5%": 0.08414337695694715, "calibration/ece": 0.15338829376624863, "calibration/mean_confidence": 0.4998068643477344, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 478.2, "completions/max_terminated_length": 478.2, "completions/mean_length": 180.275390625, "completions/mean_terminated_length": 180.32793579101562, "completions/min_length": 69.0, "completions/min_terminated_length": 87.2, "epoch": 0.512, "grad_norm": 0.0010503968223929405, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 530239973.0, "reward": 0.9264042139053345, "reward_std": 0.09568149298429489, "rewards/accuracy_reward": 0.5359375, "rewards/brier_reward": 0.78333660364151, "rewards/confidence_uniqueness_reward": 0.9571387529373169, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0028535844292491676, "rewards/frontier_ece_reward": 0.0107097577303648, "rewards/frontier_entropy_batch_reward": -0.16500933170318605, "rewards/volume_coverage_0": 1.9148062357920993e-09, "rewards/volume_coverage_1": 1.9148062357920993e-09, "rewards/volume_coverage_10": 2.77004001958403e-09, "rewards/volume_coverage_15": 5.0034791865272155e-09, "rewards/volume_coverage_20": 2.125222913207381e-08, "rewards/volume_coverage_25": 4.308101402727971e-07, "rewards/volume_coverage_5": 1.9148062357920993e-09, "signal/accuracy_reward/centered_abs_mean": 0.10535888671875, "signal/accuracy_reward/group_std_mean": 0.14029736816883087, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052679443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052679443359375, "signal/advantage_abs_mean": 0.07290942072868348, "signal/advantage_pre_scale_abs_mean": 0.07290942072868348, "signal/advantage_pre_scale_std": 0.12146659642457962, "signal/advantage_std": 0.12146659642457962, "signal/brier_reward/centered_abs_mean": 0.14373116195201874, "signal/brier_reward/group_std_mean": 0.18439054489135742, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014373116195201874, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014373116195201874, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011983883008360863, "signal/confidence_uniqueness_reward/group_std_mean": 0.015894040279090405, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011983883334323764, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011983883334323764, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814434766769, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032041643280535936, "signal/frontier_aurc_reward/group_std_mean": 0.005314776767045259, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.005205410066992e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.005205410066992e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.018103313446044923, "signal/frontier_ece_reward/group_std_mean": 0.024262651801109314, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001810331386514008, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001810331386514008, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.248811736702919, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32705228924751284, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02488117404282093, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02488117404282093, "signal/volume_coverage_0/centered_abs_mean": 2.6785904161386044e-09, "signal/volume_coverage_0/group_std_mean": 3.432433171024485e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.6785905160586766e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.6785905160586766e-10, "signal/volume_coverage_1/centered_abs_mean": 2.6785904161386044e-09, "signal/volume_coverage_1/group_std_mean": 3.432433171024485e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.6785905160586766e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.6785905160586766e-10, "signal/volume_coverage_10/centered_abs_mean": 4.61633842263609e-09, "signal/volume_coverage_10/group_std_mean": 5.933983215911099e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.71875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.616338533658393e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.616338533658393e-10, "signal/volume_coverage_15/centered_abs_mean": 7.912232824480724e-09, "signal/volume_coverage_15/group_std_mean": 1.0156219509838138e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.63125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.912232757867344e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 7.912232757867344e-10, "signal/volume_coverage_20/centered_abs_mean": 4.049936386252284e-08, "signal/volume_coverage_20/group_std_mean": 5.1828737213099883e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.215625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.049936364047823e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 4.049936364047823e-09, "signal/volume_coverage_25/centered_abs_mean": 7.110435547019733e-07, "signal/volume_coverage_25/group_std_mean": 9.2786052334759e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.009375, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.110435440438323e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 7.110435440438323e-08, "signal/volume_coverage_5/centered_abs_mean": 2.6785904161386044e-09, "signal/volume_coverage_5/group_std_mean": 3.432433171024485e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.6785905160586766e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.6785905160586766e-10, "step": 160 }, { "calibration/aurc": 0.23877614445444467, "calibration/batch_distribution_entropy": 0.9927720074762381, "calibration/buffer_distribution_entropy": 0.9990736524579434, "calibration/confidence_entropy": 0.5136030382710748, "calibration/coverage@0%": 0.025390625, "calibration/coverage@1%": 0.025390625, "calibration/coverage@10%": 0.174609375, "calibration/coverage@15%": 0.30789658757338556, "calibration/coverage@20%": 0.43612555039138945, "calibration/coverage@25%": 0.5604436766144814, "calibration/coverage@30%": 0.6866812928082192, "calibration/coverage@5%": 0.058984375, "calibration/ece": 0.09971362930713529, "calibration/mean_confidence": 0.521555718243601, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 693.8, "completions/max_terminated_length": 693.8, "completions/mean_length": 189.43447265625, "completions/mean_terminated_length": 189.56402893066405, "completions/min_length": 16.6, "completions/min_terminated_length": 83.4, "epoch": 0.528, "grad_norm": 0.0013206545263528824, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 547209318.0, "reward": 0.9277357578277587, "reward_std": 0.09738266915082931, "rewards/accuracy_reward": 0.5392578125, "rewards/brier_reward": 0.7904103755950928, "rewards/confidence_uniqueness_reward": 0.9558773875236511, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002641553757712245, "rewards/frontier_ece_reward": 0.01046077199280262, "rewards/frontier_entropy_batch_reward": -0.17193247377872467, "rewards/volume_coverage_0": 7.601244911281668e-10, "rewards/volume_coverage_1": 7.601244911281668e-10, "rewards/volume_coverage_10": 1.615653721920296e-09, "rewards/volume_coverage_15": 8.446261243882925e-09, "rewards/volume_coverage_20": 7.996970861623432e-08, "rewards/volume_coverage_25": 7.100671410853465e-07, "rewards/volume_coverage_5": 7.601244911281668e-10, "signal/accuracy_reward/centered_abs_mean": 0.1120849609375, "signal/accuracy_reward/group_std_mean": 0.14580150246620177, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05604248046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05604248046875, "signal/advantage_abs_mean": 0.07535901814699172, "signal/advantage_pre_scale_abs_mean": 0.07535901814699172, "signal/advantage_pre_scale_std": 0.12386409789323807, "signal/advantage_std": 0.12386409789323807, "signal/brier_reward/centered_abs_mean": 0.14510386288166047, "signal/brier_reward/group_std_mean": 0.1865969717502594, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014510386623442173, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014510386623442173, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012707922607660294, "signal/confidence_uniqueness_reward/group_std_mean": 0.0176456106826663, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012707923073321582, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012707923073321582, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_std_mean": 0.0035306816454976795, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00314983818680048, "signal/frontier_aurc_reward/group_std_mean": 0.005508489906787872, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.937297806260176e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.937297806260176e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01709262691438198, "signal/frontier_ece_reward/group_std_mean": 0.022918767482042312, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017092627473175525, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017092627473175525, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2518620818853378, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3292876541614532, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02518620789051056, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02518620789051056, "signal/volume_coverage_0/centered_abs_mean": 1.399367510757088e-09, "signal/volume_coverage_0/group_std_mean": 1.810499622445505e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.978125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.3993674587153837e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.3993674587153837e-10, "signal/volume_coverage_1/centered_abs_mean": 1.399367510757088e-09, "signal/volume_coverage_1/group_std_mean": 1.810499622445505e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.978125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.3993674587153837e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.3993674587153837e-10, "signal/volume_coverage_10/centered_abs_mean": 3.065493392462315e-09, "signal/volume_coverage_10/group_std_mean": 3.9665126294607946e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.86875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.0654934701779266e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.0654934701779266e-10, "signal/volume_coverage_15/centered_abs_mean": 1.5146079235250908e-08, "signal/volume_coverage_15/group_std_mean": 1.9439614362681823e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.6, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.5146079890282493e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.5146079890282493e-09, "signal/volume_coverage_20/centered_abs_mean": 1.3799506817235852e-07, "signal/volume_coverage_20/group_std_mean": 1.7752451881847263e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.115625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.3799507359024688e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.3799507359024688e-08, "signal/volume_coverage_25/centered_abs_mean": 1.6232085556566743e-06, "signal/volume_coverage_25/group_std_mean": 2.090263268428316e-06, "signal/volume_coverage_25/group_zero_std_frac": 0.003125, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.6232085329193068e-07, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.6232085329193068e-07, "signal/volume_coverage_5/centered_abs_mean": 1.399367510757088e-09, "signal/volume_coverage_5/group_std_mean": 1.810499622445505e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.978125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3993674587153837e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.3993674587153837e-10, "step": 165 }, { "calibration/aurc": 0.284975488615337, "calibration/batch_distribution_entropy": 0.981906659826009, "calibration/buffer_distribution_entropy": 0.9989556933707497, "calibration/confidence_entropy": 0.4936425820133298, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.000390625, "calibration/coverage@15%": 0.00546875, "calibration/coverage@20%": 0.27891083659491195, "calibration/coverage@25%": 0.5182225415851273, "calibration/coverage@30%": 0.625707864481409, "calibration/coverage@5%": 0.000390625, "calibration/ece": 0.10303824341155698, "calibration/mean_confidence": 0.5469045699978488, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 664.6, "completions/max_terminated_length": 664.6, "completions/mean_length": 193.5, "completions/mean_terminated_length": 193.5752746582031, "completions/min_length": 33.4, "completions/min_terminated_length": 78.6, "epoch": 0.544, "grad_norm": 0.0013264644658192992, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 564354342.0, "reward": 0.9393848657608033, "reward_std": 0.10854032784700393, "rewards/accuracy_reward": 0.57236328125, "rewards/brier_reward": 0.7735715985298157, "rewards/confidence_uniqueness_reward": 0.9557937264442444, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003230233443900943, "rewards/frontier_ece_reward": 0.008333365246653558, "rewards/frontier_entropy_batch_reward": -0.20332232415676116, "rewards/volume_coverage_0": 3.671275700112275e-10, "rewards/volume_coverage_1": 3.671275700112275e-10, "rewards/volume_coverage_10": 1.0478441445083141e-08, "rewards/volume_coverage_15": 2.614181688898043e-08, "rewards/volume_coverage_20": 1.0166868236183291e-07, "rewards/volume_coverage_25": 1.2672042976191733e-05, "rewards/volume_coverage_5": 3.671275700112275e-10, "signal/accuracy_reward/centered_abs_mean": 0.127337646484375, "signal/accuracy_reward/group_std_mean": 0.16760770380496978, "signal/accuracy_reward/group_zero_std_frac": 0.525, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0636688232421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0636688232421875, "signal/advantage_abs_mean": 0.08374463021755219, "signal/advantage_pre_scale_abs_mean": 0.08374463021755219, "signal/advantage_pre_scale_std": 0.13230954706668854, "signal/advantage_std": 0.13230954706668854, "signal/brier_reward/centered_abs_mean": 0.1559443324804306, "signal/brier_reward/group_std_mean": 0.19659957587718963, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015594434179365634, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015594434179365634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01252935416996479, "signal/confidence_uniqueness_reward/group_std_mean": 0.016918303444981575, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001252935454249382, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001252935454249382, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0038732617627829312, "signal/frontier_aurc_reward/group_std_mean": 0.006673902738839388, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.841577319893986e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.841577319893986e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01696496121585369, "signal/frontier_ece_reward/group_std_mean": 0.022660358622670174, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016964962240308523, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016964962240308523, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28068632185459136, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3599223792552948, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028068631887435913, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028068631887435913, "signal/volume_coverage_0/centered_abs_mean": 1.9625665315103903e-09, "signal/volume_coverage_0/group_std_mean": 2.518391584160895e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.953125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.9625665537148507e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.9625665537148507e-10, "signal/volume_coverage_1/centered_abs_mean": 1.9625665315103903e-09, "signal/volume_coverage_1/group_std_mean": 2.518391584160895e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.953125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.9625665537148507e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.9625665537148507e-10, "signal/volume_coverage_10/centered_abs_mean": 2.6223874804998103e-08, "signal/volume_coverage_10/group_std_mean": 3.3586331760204755e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.6875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.6223874849407027e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.6223874849407027e-09, "signal/volume_coverage_15/centered_abs_mean": 6.304688220382104e-08, "signal/volume_coverage_15/group_std_mean": 8.066067636036678e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.5625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.304688282554594e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.304688282554594e-09, "signal/volume_coverage_20/centered_abs_mean": 2.755489987293913e-07, "signal/volume_coverage_20/group_std_mean": 3.5120059465043597e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.1125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.7554899517667764e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.7554899517667764e-08, "signal/volume_coverage_25/centered_abs_mean": 2.7111944245916674e-05, "signal/volume_coverage_25/group_std_mean": 3.509574339659594e-05, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.711194588300714e-06, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.711194588300714e-06, "signal/volume_coverage_5/centered_abs_mean": 1.9625665315103903e-09, "signal/volume_coverage_5/group_std_mean": 2.518391584160895e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.953125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.9625665537148507e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.9625665537148507e-10, "step": 170 }, { "calibration/aurc": 0.28625889515309394, "calibration/batch_distribution_entropy": 0.9849145550016477, "calibration/buffer_distribution_entropy": 0.9989275472363378, "calibration/confidence_entropy": 0.5021353698713945, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.22385946673189822, "calibration/coverage@15%": 0.2813050391389432, "calibration/coverage@20%": 0.32352158757338556, "calibration/coverage@25%": 0.41536127079256363, "calibration/coverage@30%": 0.5037388392857143, "calibration/coverage@5%": 0.129296875, "calibration/ece": 0.14045019719409385, "calibration/mean_confidence": 0.47354560342460267, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 542.8, "completions/max_terminated_length": 542.8, "completions/mean_length": 189.28076171875, "completions/mean_terminated_length": 189.39351501464844, "completions/min_length": 31.2, "completions/min_terminated_length": 81.4, "epoch": 0.56, "grad_norm": 0.00103695597499609, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 581113985.0, "reward": 0.9196316242218018, "reward_std": 0.08974321335554122, "rewards/accuracy_reward": 0.5251953125, "rewards/brier_reward": 0.7853912115097046, "rewards/confidence_uniqueness_reward": 0.9560101389884949, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002737493207678199, "rewards/frontier_ece_reward": 0.00851745791733265, "rewards/frontier_entropy_batch_reward": -0.1759500414133072, "rewards/volume_coverage_0": 2.1538784755748e-09, "rewards/volume_coverage_1": 2.1538784755748e-09, "rewards/volume_coverage_10": 1.186941149811105e-08, "rewards/volume_coverage_15": 1.8020547543073918e-08, "rewards/volume_coverage_20": 7.627865983295123e-08, "rewards/volume_coverage_25": 0.00013089905551169067, "rewards/volume_coverage_5": 5.395910451788666e-09, "signal/accuracy_reward/centered_abs_mean": 0.09229736328125, "signal/accuracy_reward/group_std_mean": 0.12655276507139207, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046148681640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.046148681640625, "signal/advantage_abs_mean": 0.06730285361409187, "signal/advantage_pre_scale_abs_mean": 0.06730285361409187, "signal/advantage_pre_scale_std": 0.11310036927461624, "signal/advantage_std": 0.11310036927461624, "signal/brier_reward/centered_abs_mean": 0.14888992309570312, "signal/brier_reward/group_std_mean": 0.19088106453418732, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01488899253308773, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01488899253308773, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012639607675373555, "signal/confidence_uniqueness_reward/group_std_mean": 0.017911006696522236, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012639608001336455, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012639608001336455, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990189999342, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029790752567350866, "signal/frontier_aurc_reward/group_std_mean": 0.00507925059646368, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.723844129126519e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.723844129126519e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014743305556476117, "signal/frontier_ece_reward/group_std_mean": 0.02005004920065403, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014743305975571275, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014743305975571275, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25941252410411836, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3358436286449432, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02594125233590603, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02594125233590603, "signal/volume_coverage_0/centered_abs_mean": 3.6427160843288677e-09, "signal/volume_coverage_0/group_std_mean": 4.646901397009628e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.89375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.6427163507823936e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.6427163507823936e-10, "signal/volume_coverage_1/centered_abs_mean": 3.6427160843288677e-09, "signal/volume_coverage_1/group_std_mean": 4.646901397009628e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.89375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.6427163507823936e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.6427163507823936e-10, "signal/volume_coverage_10/centered_abs_mean": 1.4844116957846154e-08, "signal/volume_coverage_10/group_std_mean": 1.899510362335377e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.634375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4844117379730904e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.4844117379730904e-09, "signal/volume_coverage_15/centered_abs_mean": 2.3456014375256017e-08, "signal/volume_coverage_15/group_std_mean": 2.994891801222366e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.403125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.345601579634149e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.345601579634149e-09, "signal/volume_coverage_20/centered_abs_mean": 1.5019926422610296e-07, "signal/volume_coverage_20/group_std_mean": 1.9201455927486677e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.034375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.501992628050175e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.501992628050175e-08, "signal/volume_coverage_25/centered_abs_mean": 0.000258388533256948, "signal/volume_coverage_25/group_std_mean": 0.00032922495738603176, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.5838854526227806e-05, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.5838854526227806e-05, "signal/volume_coverage_5/centered_abs_mean": 7.795711720604004e-09, "signal/volume_coverage_5/group_std_mean": 9.911290277386798e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.778125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.795711631786162e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.795711631786162e-10, "step": 175 }, { "calibration/aurc": 0.33577627934999876, "calibration/batch_distribution_entropy": 0.9838566498258011, "calibration/buffer_distribution_entropy": 0.99915950902613, "calibration/confidence_entropy": 0.5019535715581608, "calibration/coverage@0%": 0.005880821078431372, "calibration/coverage@1%": 0.005880821078431372, "calibration/coverage@10%": 0.09410232843137255, "calibration/coverage@15%": 0.1340441176470588, "calibration/coverage@20%": 0.23295802696078433, "calibration/coverage@25%": 0.2845726102941176, "calibration/coverage@30%": 0.4221292892156862, "calibration/coverage@5%": 0.04509650735294118, "calibration/ece": 0.10756879220322679, "calibration/mean_confidence": 0.4687018438753654, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 600.8, "completions/max_terminated_length": 600.8, "completions/mean_length": 183.75869140625, "completions/mean_terminated_length": 183.90218505859374, "completions/min_length": 16.4, "completions/min_terminated_length": 83.2, "epoch": 0.576, "grad_norm": 0.0010323330061510205, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 598182298.0, "reward": 0.912653386592865, "reward_std": 0.0918677106499672, "rewards/accuracy_reward": 0.51796875, "rewards/brier_reward": 0.7720121502876282, "rewards/confidence_uniqueness_reward": 0.9561181187629699, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.00320719201117754, "rewards/frontier_ece_reward": 0.006886793207377196, "rewards/frontier_entropy_batch_reward": -0.19370051622390747, "rewards/volume_coverage_0": 1.4855961287529862e-09, "rewards/volume_coverage_1": 1.4855961287529862e-09, "rewards/volume_coverage_10": 7.633383480865064e-09, "rewards/volume_coverage_15": 8.681704422031088e-09, "rewards/volume_coverage_20": 1.326518031419255e-07, "rewards/volume_coverage_25": 0.00016885874574654736, "rewards/volume_coverage_5": 1.3487472694251324e-09, "signal/accuracy_reward/centered_abs_mean": 0.09427490234375, "signal/accuracy_reward/group_std_mean": 0.13005066514015198, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047137451171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047137451171875, "signal/advantage_abs_mean": 0.06886442601680756, "signal/advantage_pre_scale_abs_mean": 0.06886442601680756, "signal/advantage_pre_scale_std": 0.11540580689907073, "signal/advantage_std": 0.11540580689907073, "signal/brier_reward/centered_abs_mean": 0.15298969745635987, "signal/brier_reward/group_std_mean": 0.196148481965065, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015298970974981785, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015298970974981785, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014024058356881142, "signal/confidence_uniqueness_reward/group_std_mean": 0.01963724195957184, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014024058356881142, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014024058356881142, "signal/format_reward/centered_abs_mean": 0.001678466796875, "signal/format_reward/group_std_mean": 0.004299227613955736, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008392333984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008392333984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034444320946931837, "signal/frontier_aurc_reward/group_std_mean": 0.006053841486573219, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.305540132918395e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.305540132918395e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.013865727372467518, "signal/frontier_ece_reward/group_std_mean": 0.018852605298161507, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001386572769843042, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001386572769843042, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27207915782928466, "signal/frontier_entropy_batch_reward/group_std_mean": 0.349167013168335, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02720791697502136, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02720791697502136, "signal/volume_coverage_0/centered_abs_mean": 2.913903074386326e-09, "signal/volume_coverage_0/group_std_mean": 3.67858619121364e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.9139030077729444e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.9139030077729444e-10, "signal/volume_coverage_1/centered_abs_mean": 2.913903074386326e-09, "signal/volume_coverage_1/group_std_mean": 3.67858619121364e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.9139030077729444e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.9139030077729444e-10, "signal/volume_coverage_10/centered_abs_mean": 1.4660136571365001e-08, "signal/volume_coverage_10/group_std_mean": 1.8583565264407297e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.6625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4660137548361263e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.4660137548361263e-09, "signal/volume_coverage_15/centered_abs_mean": 2.8842995369871006e-08, "signal/volume_coverage_15/group_std_mean": 3.6737129605057814e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.51875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.88429959915959e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.88429959915959e-09, "signal/volume_coverage_20/centered_abs_mean": 2.416134393001812e-07, "signal/volume_coverage_20/group_std_mean": 3.0170963327691425e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.0875, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.4161344569506583e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.4161344569506583e-08, "signal/volume_coverage_25/centered_abs_mean": 0.00029829425329808146, "signal/volume_coverage_25/group_std_mean": 0.00037817141273990276, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.9829425693606025e-05, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.9829425693606025e-05, "signal/volume_coverage_5/centered_abs_mean": 4.639043682530541e-09, "signal/volume_coverage_5/group_std_mean": 5.895791987953203e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.825, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.6390438157573046e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.6390438157573046e-10, "step": 180 }, { "calibration/aurc": 0.32942794396551883, "calibration/batch_distribution_entropy": 0.9829910047287995, "calibration/buffer_distribution_entropy": 0.9992248661659392, "calibration/confidence_entropy": 0.48282500606721623, "calibration/coverage@0%": 0.013679534313725488, "calibration/coverage@1%": 0.013679534313725488, "calibration/coverage@10%": 0.0739813112745098, "calibration/coverage@15%": 0.1698452818627451, "calibration/coverage@20%": 0.35943933823529417, "calibration/coverage@25%": 0.4762928921568627, "calibration/coverage@30%": 0.5509574142156863, "calibration/coverage@5%": 0.014070159313725489, "calibration/ece": 0.1396739366660511, "calibration/mean_confidence": 0.501586015132056, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 570.0, "completions/max_terminated_length": 570.0, "completions/mean_length": 176.1791015625, "completions/mean_terminated_length": 176.28189086914062, "completions/min_length": 31.8, "completions/min_terminated_length": 75.2, "epoch": 0.592, "grad_norm": 0.0011522769927978516, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 615154084.0, "reward": 0.9166985511779785, "reward_std": 0.09232619255781174, "rewards/accuracy_reward": 0.523046875, "rewards/brier_reward": 0.7763656497001648, "rewards/confidence_uniqueness_reward": 0.9593760132789612, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.003226546896621585, "rewards/frontier_ece_reward": 0.0073450343683362, "rewards/frontier_entropy_batch_reward": -0.1882669061422348, "rewards/volume_coverage_0": 1.7111413064263558e-09, "rewards/volume_coverage_1": 1.7111413064263558e-09, "rewards/volume_coverage_10": 7.761732589983694e-09, "rewards/volume_coverage_15": 4.0289498137724425e-08, "rewards/volume_coverage_20": 4.315438417279438e-07, "rewards/volume_coverage_25": 0.0002638868114445359, "rewards/volume_coverage_5": 3.0811049600565355e-09, "signal/accuracy_reward/centered_abs_mean": 0.099951171875, "signal/accuracy_reward/group_std_mean": 0.13282042741775513, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0499755859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0499755859375, "signal/advantage_abs_mean": 0.07076025158166885, "signal/advantage_pre_scale_abs_mean": 0.07076025158166885, "signal/advantage_pre_scale_std": 0.11678868681192398, "signal/advantage_std": 0.11678868681192398, "signal/brier_reward/centered_abs_mean": 0.15273409485816955, "signal/brier_reward/group_std_mean": 0.19397153854370117, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015273409895598888, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015273409895598888, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012779767252504825, "signal/confidence_uniqueness_reward/group_std_mean": 0.01754718404263258, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012779767625033856, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012779767625033856, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_std_mean": 0.0029782545287162067, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036609724164009096, "signal/frontier_aurc_reward/group_std_mean": 0.006075662653893232, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.576215505949222e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.576215505949222e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.013755329139530658, "signal/frontier_ece_reward/group_std_mean": 0.018514570221304892, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013755329186096788, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013755329186096788, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26485961079597475, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3429957151412964, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02648596204817295, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02648596204817295, "signal/volume_coverage_0/centered_abs_mean": 4.143559362290716e-09, "signal/volume_coverage_0/group_std_mean": 5.253447454833804e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.759375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.143559273472874e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.143559273472874e-10, "signal/volume_coverage_1/centered_abs_mean": 4.143559362290716e-09, "signal/volume_coverage_1/group_std_mean": 5.253447454833804e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.759375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.143559273472874e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.143559273472874e-10, "signal/volume_coverage_10/centered_abs_mean": 1.8865098816434056e-08, "signal/volume_coverage_10/group_std_mean": 2.3865029952219174e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.496875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.8865098816434054e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.8865098816434054e-09, "signal/volume_coverage_15/centered_abs_mean": 5.947744803336264e-08, "signal/volume_coverage_15/group_std_mean": 7.626531512983093e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.43125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.947744918799458e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.947744918799458e-09, "signal/volume_coverage_20/centered_abs_mean": 6.130203587417781e-07, "signal/volume_coverage_20/group_std_mean": 7.699923173731804e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.109375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.130203438203807e-08, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 6.130203438203807e-08, "signal/volume_coverage_25/centered_abs_mean": 0.0005171356489881874, "signal/volume_coverage_25/group_std_mean": 0.0006737531046383082, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 5.171356460778043e-05, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 5.171356460778043e-05, "signal/volume_coverage_5/centered_abs_mean": 7.04651075267293e-09, "signal/volume_coverage_5/group_std_mean": 8.822214336845491e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.721875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.046511152353219e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.046511152353219e-10, "step": 185 }, { "calibration/aurc": 0.25322455275355066, "calibration/batch_distribution_entropy": 0.9823324911562663, "calibration/buffer_distribution_entropy": 0.9992289443404798, "calibration/confidence_entropy": 0.4846762651810278, "calibration/coverage@0%": 0.011338071615632554, "calibration/coverage@1%": 0.011338071615632554, "calibration/coverage@10%": 0.0953729980862208, "calibration/coverage@15%": 0.32919649452246647, "calibration/coverage@20%": 0.48641912544127236, "calibration/coverage@25%": 0.5775105431727485, "calibration/coverage@30%": 0.684254498460343, "calibration/coverage@5%": 0.039463071615632554, "calibration/ece": 0.09481818543349048, "calibration/mean_confidence": 0.5019350730358498, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 602.4, "completions/max_terminated_length": 602.4, "completions/mean_length": 175.944140625, "completions/mean_terminated_length": 176.04876098632812, "completions/min_length": 45.6, "completions/min_terminated_length": 75.4, "epoch": 0.608, "grad_norm": 0.0010520720388740301, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 631955240.0, "reward": 0.921320378780365, "reward_std": 0.09033734798431396, "rewards/accuracy_reward": 0.5259765625, "rewards/brier_reward": 0.7994609236717224, "rewards/confidence_uniqueness_reward": 0.9615864753723145, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002751393895596266, "rewards/frontier_ece_reward": 0.008317224588245154, "rewards/frontier_entropy_batch_reward": -0.18277242481708528, "rewards/volume_coverage_0": 1.578650821176808e-09, "rewards/volume_coverage_1": 1.578650821176808e-09, "rewards/volume_coverage_10": 1.3228030226741794e-08, "rewards/volume_coverage_15": 3.540733644058491e-08, "rewards/volume_coverage_20": 8.138225325637905e-07, "rewards/volume_coverage_25": 0.0009779959451407193, "rewards/volume_coverage_5": 2.3707905971726007e-09, "signal/accuracy_reward/centered_abs_mean": 0.09971923828125, "signal/accuracy_reward/group_std_mean": 0.12960658967494965, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049859619140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049859619140625, "signal/advantage_abs_mean": 0.06983233988285065, "signal/advantage_pre_scale_abs_mean": 0.06983233988285065, "signal/advantage_pre_scale_std": 0.11588151454925537, "signal/advantage_std": 0.11588151454925537, "signal/brier_reward/centered_abs_mean": 0.1423773616552353, "signal/brier_reward/group_std_mean": 0.18201495707035065, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014237736538052558, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014237736538052558, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012686197459697724, "signal/confidence_uniqueness_reward/group_std_mean": 0.017919574119150638, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012686197878792882, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012686197878792882, "signal/format_reward/centered_abs_mean": 0.0014892578125, "signal/format_reward/group_std_mean": 0.003746800497174263, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00074462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00074462890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031977016944438217, "signal/frontier_aurc_reward/group_std_mean": 0.005458212643861771, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.99712698708754e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.99712698708754e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.012652770802378654, "signal/frontier_ece_reward/group_std_mean": 0.016831176169216634, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012652770616114139, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012652770616114139, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26298512816429137, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33757553100585935, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026298512518405915, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026298512518405915, "signal/volume_coverage_0/centered_abs_mean": 2.830159973044033e-09, "signal/volume_coverage_0/group_std_mean": 3.566805295562858e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.8301599952484935e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.8301599952484935e-10, "signal/volume_coverage_1/centered_abs_mean": 2.830159973044033e-09, "signal/volume_coverage_1/group_std_mean": 3.566805295562858e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.8301599952484935e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.8301599952484935e-10, "signal/volume_coverage_10/centered_abs_mean": 2.096147753150035e-08, "signal/volume_coverage_10/group_std_mean": 2.6520628182424844e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.4375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0961477664727114e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.0961477664727114e-09, "signal/volume_coverage_15/centered_abs_mean": 6.579716114174516e-08, "signal/volume_coverage_15/group_std_mean": 8.336484569326785e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.259375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.579716593790863e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.579716593790863e-09, "signal/volume_coverage_20/centered_abs_mean": 1.493381773798319e-06, "signal/volume_coverage_20/group_std_mean": 1.8822723177436274e-06, "signal/volume_coverage_20/group_zero_std_frac": 0.003125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.4933818164308833e-07, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.4933818164308833e-07, "signal/volume_coverage_25/centered_abs_mean": 0.0014129023300483822, "signal/volume_coverage_25/group_std_mean": 0.0017993575427681207, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00014129024057183415, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00014129024057183415, "signal/volume_coverage_5/centered_abs_mean": 3.845295371007751e-09, "signal/volume_coverage_5/group_std_mean": 4.820825294515885e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.871875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.8452955819501257e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.8452955819501257e-10, "step": 190 }, { "calibration/aurc": 0.28957636994436, "calibration/batch_distribution_entropy": 0.9859862395418935, "calibration/buffer_distribution_entropy": 0.9992691344631108, "calibration/confidence_entropy": 0.5273816689513522, "calibration/coverage@0%": 0.0019546538649706457, "calibration/coverage@1%": 0.0019546538649706457, "calibration/coverage@10%": 0.0285691352739726, "calibration/coverage@15%": 0.1536226455479452, "calibration/coverage@20%": 0.27676583904109586, "calibration/coverage@25%": 0.44141465875733854, "calibration/coverage@30%": 0.5814135885518591, "calibration/coverage@5%": 0.0019546538649706457, "calibration/ece": 0.09294006812124718, "calibration/mean_confidence": 0.512454016404094, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 534.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 176.61015625, "completions/mean_terminated_length": 176.6790740966797, "completions/min_length": 12.8, "completions/min_terminated_length": 68.2, "epoch": 0.624, "grad_norm": 0.001361057278700173, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 649107632.0, "reward": 0.9198349595069886, "reward_std": 0.09446196258068085, "rewards/accuracy_reward": 0.52666015625, "rewards/brier_reward": 0.7886810898780823, "rewards/confidence_uniqueness_reward": 0.9602108001708984, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003087216429412365, "rewards/frontier_ece_reward": 0.005673312395811081, "rewards/frontier_entropy_batch_reward": -0.18763623535633087, "rewards/volume_coverage_0": 1.0043352427491215e-09, "rewards/volume_coverage_1": 1.0043352427491215e-09, "rewards/volume_coverage_10": 1.4544933479854194e-08, "rewards/volume_coverage_15": 1.0033256554464743e-07, "rewards/volume_coverage_20": 7.805879029376684e-07, "rewards/volume_coverage_25": 0.000946238508913666, "rewards/volume_coverage_5": 1.0043352427491215e-09, "signal/accuracy_reward/centered_abs_mean": 0.101385498046875, "signal/accuracy_reward/group_std_mean": 0.1348143756389618, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0506927490234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0506927490234375, "signal/advantage_abs_mean": 0.07220707982778549, "signal/advantage_pre_scale_abs_mean": 0.07220707982778549, "signal/advantage_pre_scale_std": 0.11911879479885101, "signal/advantage_std": 0.11911879479885101, "signal/brier_reward/centered_abs_mean": 0.14108724892139435, "signal/brier_reward/group_std_mean": 0.18158984780311585, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01410872545093298, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01410872545093298, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012880866974592209, "signal/confidence_uniqueness_reward/group_std_mean": 0.01754343006759882, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012880866648629307, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012880866648629307, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033061566296964883, "signal/frontier_aurc_reward/group_std_mean": 0.005856604594737291, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.132695976295508e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.132695976295508e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010227170027792453, "signal/frontier_ece_reward/group_std_mean": 0.013960633054375648, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010227170423604548, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010227170423604548, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2718630850315094, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35125975012779237, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027186309918761253, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027186309918761253, "signal/volume_coverage_0/centered_abs_mean": 2.233685147956521e-09, "signal/volume_coverage_0/group_std_mean": 2.8576097932386802e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.95625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.233685250652151e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.233685250652151e-10, "signal/volume_coverage_1/centered_abs_mean": 2.233685147956521e-09, "signal/volume_coverage_1/group_std_mean": 2.8576097932386802e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.95625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.233685250652151e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.233685250652151e-10, "signal/volume_coverage_10/centered_abs_mean": 2.6452925894204782e-08, "signal/volume_coverage_10/group_std_mean": 3.414551414948619e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.484375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.6452926960018884e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.6452926960018884e-09, "signal/volume_coverage_15/centered_abs_mean": 1.6930871389320145e-07, "signal/volume_coverage_15/group_std_mean": 2.1859083005892898e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.296875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.6930871726827944e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.6930871726827944e-08, "signal/volume_coverage_20/centered_abs_mean": 1.2653310534460616e-06, "signal/volume_coverage_20/group_std_mean": 1.6244108905993927e-06, "signal/volume_coverage_20/group_zero_std_frac": 0.009375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.2653311110000232e-07, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.2653311110000232e-07, "signal/volume_coverage_25/centered_abs_mean": 0.0016905165975913405, "signal/volume_coverage_25/group_std_mean": 0.0022139872424304487, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00016905165684875101, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00016905165684875101, "signal/volume_coverage_5/centered_abs_mean": 2.233685147956521e-09, "signal/volume_coverage_5/group_std_mean": 2.8576097932386802e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.95625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.233685250652151e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.233685250652151e-10, "step": 195 }, { "calibration/aurc": 0.26891983390554686, "calibration/batch_distribution_entropy": 0.9892751505258353, "calibration/buffer_distribution_entropy": 0.9993554405698306, "calibration/confidence_entropy": 0.525787655235901, "calibration/coverage@0%": 0.009800857843137254, "calibration/coverage@1%": 0.009800857843137254, "calibration/coverage@10%": 0.2555836397058823, "calibration/coverage@15%": 0.3670235906862745, "calibration/coverage@20%": 0.4393504901960784, "calibration/coverage@25%": 0.5050428861613139, "calibration/coverage@30%": 0.5847763270547945, "calibration/coverage@5%": 0.18278492647058825, "calibration/ece": 0.17315416015321752, "calibration/mean_confidence": 0.5182405579510768, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 815.0, "completions/max_terminated_length": 815.0, "completions/mean_length": 178.43115234375, "completions/mean_terminated_length": 178.65952758789064, "completions/min_length": 29.8, "completions/min_terminated_length": 68.6, "epoch": 0.64, "grad_norm": 0.000993815716356039, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 666277455.0, "reward": 0.942539393901825, "reward_std": 0.08385625034570694, "rewards/accuracy_reward": 0.57353515625, "rewards/brier_reward": 0.7822496891021729, "rewards/confidence_uniqueness_reward": 0.9578921079635621, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.002722199750132859, "rewards/frontier_ece_reward": 0.005280413199216127, "rewards/frontier_entropy_batch_reward": -0.18149682581424714, "rewards/volume_coverage_0": 5.83049909741451e-10, "rewards/volume_coverage_1": 5.83049909741451e-10, "rewards/volume_coverage_10": 4.8380893513666475e-09, "rewards/volume_coverage_15": 1.38987459408213e-07, "rewards/volume_coverage_20": 2.436090110791156e-06, "rewards/volume_coverage_25": 0.0009665171091910452, "rewards/volume_coverage_5": 5.83049909741451e-10, "signal/accuracy_reward/centered_abs_mean": 0.078619384765625, "signal/accuracy_reward/group_std_mean": 0.10908448547124863, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393096923828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0393096923828125, "signal/advantage_abs_mean": 0.06242771372199059, "signal/advantage_pre_scale_abs_mean": 0.06242771372199059, "signal/advantage_pre_scale_std": 0.10776360332965851, "signal/advantage_std": 0.10776360332965851, "signal/brier_reward/centered_abs_mean": 0.13804030120372773, "signal/brier_reward/group_std_mean": 0.17795804738998414, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013804030977189541, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013804030977189541, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01370534636080265, "signal/confidence_uniqueness_reward/group_std_mean": 0.018724654987454414, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001370534673333168, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001370534673333168, "signal/format_reward/centered_abs_mean": 0.00230712890625, "signal/format_reward/group_std_mean": 0.0046893797349184755, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001153564453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001153564453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.00299143441952765, "signal/frontier_aurc_reward/group_std_mean": 0.005100049264729023, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7392930607893504e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7392930607893504e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009928003698587418, "signal/frontier_ece_reward/group_std_mean": 0.013508135452866554, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009928003652021288, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009928003652021288, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25994506776332854, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3403321862220764, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02599450834095478, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02599450834095478, "signal/volume_coverage_0/centered_abs_mean": 2.0802533140518166e-09, "signal/volume_coverage_0/group_std_mean": 2.721802350169611e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.90625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.080253476421934e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.080253476421934e-10, "signal/volume_coverage_1/centered_abs_mean": 2.0802533140518166e-09, "signal/volume_coverage_1/group_std_mean": 2.721802350169611e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.90625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.080253476421934e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.080253476421934e-10, "signal/volume_coverage_10/centered_abs_mean": 1.7485952596985042e-08, "signal/volume_coverage_10/group_std_mean": 2.2394617360532722e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.478125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7485952463758282e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.7485952463758282e-09, "signal/volume_coverage_15/centered_abs_mean": 2.2916896256219843e-07, "signal/volume_coverage_15/group_std_mean": 2.9144772639710936e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.2625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.2916897002289715e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.2916897002289715e-08, "signal/volume_coverage_20/centered_abs_mean": 5.1552106924646065e-06, "signal/volume_coverage_20/group_std_mean": 6.58480396396044e-06, "signal/volume_coverage_20/group_zero_std_frac": 0.05, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.155210658358556e-07, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 5.155210658358556e-07, "signal/volume_coverage_25/centered_abs_mean": 0.001954718097113073, "signal/volume_coverage_25/group_std_mean": 0.002557957172393799, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00019547181436792017, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00019547181436792017, "signal/volume_coverage_5/centered_abs_mean": 2.0802533140518166e-09, "signal/volume_coverage_5/group_std_mean": 2.721802350169611e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.90625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.080253476421934e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.080253476421934e-10, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.48648042753111, "eval_calibration/batch_distribution_entropy": 0.9412787594127898, "eval_calibration/buffer_distribution_entropy": 0.9993122469934742, "eval_calibration/confidence_entropy": 0.496019188313124, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.0859375, "eval_calibration/coverage@20%": 0.15625, "eval_calibration/coverage@25%": 0.2109375, "eval_calibration/coverage@30%": 0.2265625, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.21468968819672685, "eval_calibration/mean_confidence": 0.4715669516536699, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 364.5, "eval_completions/max_terminated_length": 364.5, "eval_completions/mean_length": 180.03556060791016, "eval_completions/mean_terminated_length": 180.03556060791016, "eval_completions/min_length": 92.5, "eval_completions/min_terminated_length": 92.5, "eval_loss": 0.0, "eval_num_tokens": 666277455.0, "eval_reward": 0.7850509434938431, "eval_reward_std": 0.2481500282883644, "eval_rewards/accuracy_reward": 0.431640625, "eval_rewards/brier_reward": 0.7836132198572159, "eval_rewards/confidence_uniqueness_reward": 0.901123046875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003661160182673484, "eval_rewards/frontier_ece_reward": 0.00562399672344327, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_rewards/volume_coverage_0": 3.885341458287339e-10, "eval_rewards/volume_coverage_1": 3.885341458287339e-10, "eval_rewards/volume_coverage_10": 5.619555665248299e-08, "eval_rewards/volume_coverage_15": 4.643091244815878e-07, "eval_rewards/volume_coverage_20": 1.3747317098022904e-05, "eval_rewards/volume_coverage_25": 0.0023892930475994945, "eval_rewards/volume_coverage_5": 6.120992632890898e-09, "eval_runtime": 19.7879, "eval_samples_per_second": 25.268, "eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875, "eval_signal/accuracy_reward/group_std_mean": 0.4921695739030838, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375, "eval_signal/advantage_abs_mean": 0.23600686714053154, "eval_signal/advantage_pre_scale_abs_mean": 0.23600686714053154, "eval_signal/advantage_pre_scale_std": 0.24546074494719505, "eval_signal/advantage_std": 0.24546074494719505, "eval_signal/brier_reward/centered_abs_mean": 0.18876836448907852, "eval_signal/brier_reward/group_std_mean": 0.23586371541023254, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01887683616951108, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01887683616951108, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0405426025390625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04792775306850672, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004054260440170765, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004054260440170765, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004725108272396028, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008816197630949318, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9063855587737635e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9063855587737635e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.011614769231528044, "eval_signal/frontier_ece_reward/group_std_mean": 0.01629130309447646, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011614770046435297, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011614770046435297, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_signal/volume_coverage_0/centered_abs_mean": 8.46114164843037e-10, "eval_signal/volume_coverage_0/group_std_mean": 1.0500743954922065e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 1.0, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.4611418132291e-11, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 8.4611418132291e-11, "eval_signal/volume_coverage_1/centered_abs_mean": 8.46114164843037e-10, "eval_signal/volume_coverage_1/group_std_mean": 1.0500743954922065e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 1.0, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.4611418132291e-11, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 8.4611418132291e-11, "eval_signal/volume_coverage_10/centered_abs_mean": 1.3369631712834007e-07, "eval_signal/volume_coverage_10/group_std_mean": 1.6589476459216712e-07, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.4375, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.3369631957083072e-08, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.3369631957083072e-08, "eval_signal/volume_coverage_15/centered_abs_mean": 8.858086886220917e-07, "eval_signal/volume_coverage_15/group_std_mean": 1.1126764576374626e-06, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.1875, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.858087241492285e-08, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 8.858087241492285e-08, "eval_signal/volume_coverage_20/centered_abs_mean": 2.2470571821031626e-05, "eval_signal/volume_coverage_20/group_std_mean": 2.80036192634725e-05, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.125, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.247057153681453e-06, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 2.247057153681453e-06, "eval_signal/volume_coverage_25/centered_abs_mean": 0.004138121090363711, "eval_signal/volume_coverage_25/group_std_mean": 0.005272858194075525, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00041381209302926436, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.00041381209302926436, "eval_signal/volume_coverage_5/centered_abs_mean": 2.835196968864473e-08, "eval_signal/volume_coverage_5/group_std_mean": 3.607782748243915e-08, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.875, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.8351969587163406e-09, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 2.8351969587163406e-09, "eval_steps_per_second": 0.202, "step": 200 }, { "calibration/aurc": 0.4352268779511471, "calibration/batch_distribution_entropy": 0.9830507741058137, "calibration/buffer_distribution_entropy": 0.9993089567781432, "calibration/confidence_entropy": 0.5284715032781411, "calibration/coverage@0%": 0.008203125, "calibration/coverage@1%": 0.008203125, "calibration/coverage@10%": 0.008203125, "calibration/coverage@15%": 0.01171875, "calibration/coverage@20%": 0.01484375, "calibration/coverage@25%": 0.07109375, "calibration/coverage@30%": 0.205078125, "calibration/coverage@5%": 0.008203125, "calibration/ece": 0.11390982277395512, "calibration/mean_confidence": 0.480846441143625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 488.0, "completions/max_terminated_length": 488.0, "completions/mean_length": 179.42890625, "completions/mean_terminated_length": 179.46363220214843, "completions/min_length": 57.6, "completions/min_terminated_length": 73.4, "epoch": 0.656, "grad_norm": 0.0011551964562386274, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 682971351.0, "reward": 0.9070303916931153, "reward_std": 0.09220799803733826, "rewards/accuracy_reward": 0.5052734375, "rewards/brier_reward": 0.7661949157714844, "rewards/confidence_uniqueness_reward": 0.9564722537994385, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003637386718764901, "rewards/frontier_ece_reward": 0.0046428360510617495, "rewards/frontier_entropy_batch_reward": -0.18354730010032655, "rewards/volume_coverage_0": 1.1100982744149945e-09, "rewards/volume_coverage_1": 1.1100982744149945e-09, "rewards/volume_coverage_10": 4.797209829376925e-09, "rewards/volume_coverage_15": 1.8364521581215598e-07, "rewards/volume_coverage_20": 8.764892163526383e-06, "rewards/volume_coverage_25": 0.0015963076613843441, "rewards/volume_coverage_5": 1.0403670203107397e-09, "signal/accuracy_reward/centered_abs_mean": 0.097314453125, "signal/accuracy_reward/group_std_mean": 0.13193988651037217, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0486572265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0486572265625, "signal/advantage_abs_mean": 0.07022999972105026, "signal/advantage_pre_scale_abs_mean": 0.07022999972105026, "signal/advantage_pre_scale_std": 0.11693431288003922, "signal/advantage_std": 0.11693431288003922, "signal/brier_reward/centered_abs_mean": 0.1503700226545334, "signal/brier_reward/group_std_mean": 0.19127426743507386, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015037002786993981, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015037002786993981, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012323895655572414, "signal/confidence_uniqueness_reward/group_std_mean": 0.016047840379178523, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001232389616779983, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001232389616779983, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036699477583169936, "signal/frontier_aurc_reward/group_std_mean": 0.006296676304191351, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.587434959830716e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.587434959830716e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009865659102797509, "signal/frontier_ece_reward/group_std_mean": 0.013996114954352379, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00098656591726467, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00098656591726467, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2666574031114578, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34437201619148256, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026665739342570306, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026665739342570306, "signal/volume_coverage_0/centered_abs_mean": 3.058716169235254e-09, "signal/volume_coverage_0/group_std_mean": 3.978690621408987e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.871875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.058716224746405e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.058716224746405e-10, "signal/volume_coverage_1/centered_abs_mean": 3.058716169235254e-09, "signal/volume_coverage_1/group_std_mean": 3.978690621408987e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.871875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.058716224746405e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.058716224746405e-10, "signal/volume_coverage_10/centered_abs_mean": 1.4398968239959231e-08, "signal/volume_coverage_10/group_std_mean": 1.8700468018550964e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.575, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4398968462003837e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.4398968462003837e-09, "signal/volume_coverage_15/centered_abs_mean": 2.995924269555417e-07, "signal/volume_coverage_15/group_std_mean": 3.894604368781529e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.275, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.995924384130433e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.995924384130433e-08, "signal/volume_coverage_20/centered_abs_mean": 1.7284190471400505e-05, "signal/volume_coverage_20/group_std_mean": 2.2454723512055354e-05, "signal/volume_coverage_20/group_zero_std_frac": 0.10625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.7284191017097327e-06, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.7284191017097327e-06, "signal/volume_coverage_25/centered_abs_mean": 0.0025885120034217835, "signal/volume_coverage_25/group_std_mean": 0.0033385612536221743, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00025885119393933567, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00025885119393933567, "signal/volume_coverage_5/centered_abs_mean": 3.518249025447062e-09, "signal/volume_coverage_5/group_std_mean": 4.586072410717179e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.865625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.518249136469365e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.518249136469365e-10, "step": 205 }, { "calibration/aurc": 0.32527705960723285, "calibration/batch_distribution_entropy": 0.9799286460754233, "calibration/buffer_distribution_entropy": 0.9991726792626041, "calibration/confidence_entropy": 0.5010712565281534, "calibration/coverage@0%": 0.005086533757338552, "calibration/coverage@1%": 0.005086533757338552, "calibration/coverage@10%": 0.07893835616438356, "calibration/coverage@15%": 0.14262934197651664, "calibration/coverage@20%": 0.2079195205479452, "calibration/coverage@25%": 0.30212818003913894, "calibration/coverage@30%": 0.37953002690802345, "calibration/coverage@5%": 0.005086533757338552, "calibration/ece": 0.14933391241023902, "calibration/mean_confidence": 0.521799676701359, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 613.4, "completions/max_terminated_length": 613.4, "completions/mean_length": 179.85869140625, "completions/mean_terminated_length": 179.9818878173828, "completions/min_length": 15.8, "completions/min_terminated_length": 76.6, "epoch": 0.672, "grad_norm": 0.0010782018071040511, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 699726544.0, "reward": 0.9170631289482116, "reward_std": 0.08810421824455261, "rewards/accuracy_reward": 0.5203125, "rewards/brier_reward": 0.7815946936607361, "rewards/confidence_uniqueness_reward": 0.9560160875320435, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0033685142174363137, "rewards/frontier_ece_reward": 0.005748637113720178, "rewards/frontier_entropy_batch_reward": -0.17214102149009705, "rewards/volume_coverage_0": 2.913457092246219e-09, "rewards/volume_coverage_1": 2.913457092246219e-09, "rewards/volume_coverage_10": 2.614499892139577e-08, "rewards/volume_coverage_15": 4.295584156821519e-07, "rewards/volume_coverage_20": 1.1378515046089887e-05, "rewards/volume_coverage_25": 0.001677690027281642, "rewards/volume_coverage_5": 9.1560294845916e-09, "signal/accuracy_reward/centered_abs_mean": 0.0943603515625, "signal/accuracy_reward/group_std_mean": 0.12397949695587158, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04718017578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04718017578125, "signal/advantage_abs_mean": 0.06749112159013748, "signal/advantage_pre_scale_abs_mean": 0.06749112159013748, "signal/advantage_pre_scale_std": 0.11356084197759628, "signal/advantage_std": 0.11356084197759628, "signal/brier_reward/centered_abs_mean": 0.14855161011219026, "signal/brier_reward/group_std_mean": 0.18973374664783477, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014855161309242249, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014855161309242249, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01256434228271246, "signal/confidence_uniqueness_reward/group_std_mean": 0.018024000525474548, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012564342236146332, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012564342236146332, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036973343696445225, "signal/frontier_aurc_reward/group_std_mean": 0.006400900986045599, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.621668122126721e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.621668122126721e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010354111157357692, "signal/frontier_ece_reward/group_std_mean": 0.013915826939046383, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010354111203923822, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010354111203923822, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24957755506038665, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3306098520755768, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02495775669813156, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02495775669813156, "signal/volume_coverage_0/centered_abs_mean": 6.486050097009866e-09, "signal/volume_coverage_0/group_std_mean": 8.15934981801547e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.728125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.486050529996845e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.486050529996845e-10, "signal/volume_coverage_1/centered_abs_mean": 6.486050097009866e-09, "signal/volume_coverage_1/group_std_mean": 8.15934981801547e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.728125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.486050529996845e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.486050529996845e-10, "signal/volume_coverage_10/centered_abs_mean": 4.016737840117912e-08, "signal/volume_coverage_10/group_std_mean": 5.21149178922542e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.64375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.016737653600444e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.016737653600444e-09, "signal/volume_coverage_15/centered_abs_mean": 5.667033235567942e-07, "signal/volume_coverage_15/group_std_mean": 7.309852406933714e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.45, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.66703334925478e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.66703334925478e-08, "signal/volume_coverage_20/centered_abs_mean": 2.5757231196621432e-05, "signal/volume_coverage_20/group_std_mean": 3.245171283197124e-05, "signal/volume_coverage_20/group_zero_std_frac": 0.15625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.5757231924217195e-06, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.5757231924217195e-06, "signal/volume_coverage_25/centered_abs_mean": 0.0032109246589243413, "signal/volume_coverage_25/group_std_mean": 0.004136029817163944, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00032109246822074054, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00032109246822074054, "signal/volume_coverage_5/centered_abs_mean": 1.5452855794961805e-08, "signal/volume_coverage_5/group_std_mean": 1.9889450797450082e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.70625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.5452855883779647e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.5452855883779647e-09, "step": 210 }, { "calibration/aurc": 0.3602656739699735, "calibration/batch_distribution_entropy": 0.9861478235373793, "calibration/buffer_distribution_entropy": 0.9991490683063098, "calibration/confidence_entropy": 0.518639065678005, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.01484375, "calibration/coverage@15%": 0.112890625, "calibration/coverage@20%": 0.191015625, "calibration/coverage@25%": 0.41484375, "calibration/coverage@30%": 0.6168075980392157, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.15347873798188744, "calibration/mean_confidence": 0.5023286296324256, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 547.2, "completions/max_terminated_length": 547.2, "completions/mean_length": 182.27578125, "completions/mean_terminated_length": 182.3482238769531, "completions/min_length": 30.8, "completions/min_terminated_length": 74.8, "epoch": 0.688, "grad_norm": 0.0012053457321599126, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 716546968.0, "reward": 0.9287741422653198, "reward_std": 0.09164600521326065, "rewards/accuracy_reward": 0.545703125, "rewards/brier_reward": 0.7708859205245971, "rewards/confidence_uniqueness_reward": 0.9557458996772766, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0031304874457418917, "rewards/frontier_ece_reward": 0.004407191788777709, "rewards/frontier_entropy_batch_reward": -0.17154249548912048, "rewards/volume_coverage_0": 3.516108260104289e-09, "rewards/volume_coverage_1": 3.516108260104289e-09, "rewards/volume_coverage_10": 1.6128615110133637e-08, "rewards/volume_coverage_15": 1.4867726569889327e-07, "rewards/volume_coverage_20": 1.796638493942737e-05, "rewards/volume_coverage_25": 0.0020555023336783053, "rewards/volume_coverage_5": 9.340963502690869e-09, "signal/accuracy_reward/centered_abs_mean": 0.1008544921875, "signal/accuracy_reward/group_std_mean": 0.13470993041992188, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05042724609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05042724609375, "signal/advantage_abs_mean": 0.0695977509021759, "signal/advantage_pre_scale_abs_mean": 0.0695977509021759, "signal/advantage_pre_scale_std": 0.11726142615079879, "signal/advantage_std": 0.11726142615079879, "signal/brier_reward/centered_abs_mean": 0.1450573042035103, "signal/brier_reward/group_std_mean": 0.18608674705028533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014505730383098126, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014505730383098126, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012379896081984042, "signal/confidence_uniqueness_reward/group_std_mean": 0.01644364632666111, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012379896361380816, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012379896361380816, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033159184269607065, "signal/frontier_aurc_reward/group_std_mean": 0.005820685159415006, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1448980846325866e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1448980846325866e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00935722216963768, "signal/frontier_ece_reward/group_std_mean": 0.012687020935118198, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009357222355902195, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009357222355902195, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25185816884040835, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3297829031944275, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02518581636250019, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02518581636250019, "signal/volume_coverage_0/centered_abs_mean": 5.15811207080219e-09, "signal/volume_coverage_0/group_std_mean": 6.73305793341683e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.75625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.158112159620032e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.158112159620032e-10, "signal/volume_coverage_1/centered_abs_mean": 5.15811207080219e-09, "signal/volume_coverage_1/group_std_mean": 6.73305793341683e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.75625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.158112159620032e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.158112159620032e-10, "signal/volume_coverage_10/centered_abs_mean": 2.6287222354426375e-08, "signal/volume_coverage_10/group_std_mean": 3.41128814085323e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.653125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.62872226208799e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.62872226208799e-09, "signal/volume_coverage_15/centered_abs_mean": 2.7471078709595533e-07, "signal/volume_coverage_15/group_std_mean": 3.5181790281058056e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.3, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.7471080343843824e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.7471080343843824e-08, "signal/volume_coverage_20/centered_abs_mean": 2.58131702139508e-05, "signal/volume_coverage_20/group_std_mean": 3.3654694198048675e-05, "signal/volume_coverage_20/group_zero_std_frac": 0.05, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.5813170623223414e-06, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.5813170623223414e-06, "signal/volume_coverage_25/centered_abs_mean": 0.003704306995496154, "signal/volume_coverage_25/group_std_mean": 0.004811380803585052, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.000370430713519454, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.000370430713519454, "signal/volume_coverage_5/centered_abs_mean": 1.2779765867776404e-08, "signal/volume_coverage_5/group_std_mean": 1.6574323069562525e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.73125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2779766911386047e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.2779766911386047e-09, "step": 215 }, { "calibration/aurc": 0.25712697833674014, "calibration/batch_distribution_entropy": 0.985372593939745, "calibration/buffer_distribution_entropy": 0.9989997082680914, "calibration/confidence_entropy": 0.5112285743157771, "calibration/coverage@0%": 0.001953889432485323, "calibration/coverage@1%": 0.001953889432485323, "calibration/coverage@10%": 0.027735139432485324, "calibration/coverage@15%": 0.18099544398238748, "calibration/coverage@20%": 0.32993364726027397, "calibration/coverage@25%": 0.5253309992661448, "calibration/coverage@30%": 0.7226340814579256, "calibration/coverage@5%": 0.001953889432485323, "calibration/ece": 0.1029719488611335, "calibration/mean_confidence": 0.518194728545559, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 581.8, "completions/max_terminated_length": 581.8, "completions/mean_length": 181.2587890625, "completions/mean_terminated_length": 181.38422546386718, "completions/min_length": 13.0, "completions/min_terminated_length": 76.4, "epoch": 0.704, "grad_norm": 0.00116161466576159, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 733269202.0, "reward": 0.9273841381072998, "reward_std": 0.08398017287254333, "rewards/accuracy_reward": 0.54111328125, "rewards/brier_reward": 0.7831794381141662, "rewards/confidence_uniqueness_reward": 0.9567205667495727, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0031599897425621747, "rewards/frontier_ece_reward": 0.004753571469336748, "rewards/frontier_entropy_batch_reward": -0.17446809411048889, "rewards/volume_coverage_0": 2.014733241351152e-09, "rewards/volume_coverage_1": 2.014733241351152e-09, "rewards/volume_coverage_10": 2.636177316439614e-08, "rewards/volume_coverage_15": 9.60477851918995e-08, "rewards/volume_coverage_20": 1.1378265980965807e-05, "rewards/volume_coverage_25": 0.0018908534664660692, "rewards/volume_coverage_5": 6.5962154960352845e-09, "signal/accuracy_reward/centered_abs_mean": 0.083428955078125, "signal/accuracy_reward/group_std_mean": 0.11187773495912552, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0417144775390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0417144775390625, "signal/advantage_abs_mean": 0.06403366848826408, "signal/advantage_pre_scale_abs_mean": 0.06403366848826408, "signal/advantage_pre_scale_std": 0.10933973640203476, "signal/advantage_std": 0.10933973640203476, "signal/brier_reward/centered_abs_mean": 0.1387535125017166, "signal/brier_reward/group_std_mean": 0.17746146619319916, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013875351287424564, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013875351287424564, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012523720599710941, "signal/confidence_uniqueness_reward/group_std_mean": 0.01720440424978733, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012523720972239972, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012523720972239972, "signal/format_reward/centered_abs_mean": 0.001287841796875, "signal/format_reward/group_std_mean": 0.0031351604498922824, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006439208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006439208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034087890293449164, "signal/frontier_aurc_reward/group_std_mean": 0.005871927179396152, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.260986315784976e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.260986315784976e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009251984581351281, "signal/frontier_ece_reward/group_std_mean": 0.012797567993402481, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009251985000446438, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009251985000446438, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.252427738904953, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33083915114402773, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025242774933576583, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025242774933576583, "signal/volume_coverage_0/centered_abs_mean": 5.998387475614209e-09, "signal/volume_coverage_0/group_std_mean": 7.584478556310614e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.765625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.998387564432051e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.998387564432051e-10, "signal/volume_coverage_1/centered_abs_mean": 5.998387475614209e-09, "signal/volume_coverage_1/group_std_mean": 7.584478556310614e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.765625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.998387564432051e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.998387564432051e-10, "signal/volume_coverage_10/centered_abs_mean": 5.079258258433583e-08, "signal/volume_coverage_10/group_std_mean": 6.501357034949251e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.5375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.079258391660346e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 5.079258391660346e-09, "signal/volume_coverage_15/centered_abs_mean": 2.757137721687286e-07, "signal/volume_coverage_15/group_std_mean": 3.5040081058923533e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.340625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.757137842479551e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.757137842479551e-08, "signal/volume_coverage_20/centered_abs_mean": 3.360342743690126e-05, "signal/volume_coverage_20/group_std_mean": 4.258807239239104e-05, "signal/volume_coverage_20/group_zero_std_frac": 0.15, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.3603427255002315e-06, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.3603427255002315e-06, "signal/volume_coverage_25/centered_abs_mean": 0.003729742532595992, "signal/volume_coverage_25/group_std_mean": 0.004862392600625753, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0003729742602445185, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0003729742602445185, "signal/volume_coverage_5/centered_abs_mean": 1.4848486706853237e-08, "signal/volume_coverage_5/group_std_mean": 1.8887312158710756e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.728125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.484848732857813e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.484848732857813e-09, "step": 220 }, { "calibration/aurc": 0.26982895429920506, "calibration/batch_distribution_entropy": 0.9920044552136869, "calibration/buffer_distribution_entropy": 0.9989689610022271, "calibration/confidence_entropy": 0.5077218693296869, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.146484375, "calibration/coverage@15%": 0.301171875, "calibration/coverage@20%": 0.380078125, "calibration/coverage@25%": 0.487109375, "calibration/coverage@30%": 0.6015625, "calibration/coverage@5%": 0.02109375, "calibration/ece": 0.13914586113741415, "calibration/mean_confidence": 0.5164801231632847, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 691.8, "completions/max_terminated_length": 691.8, "completions/mean_length": 181.493359375, "completions/mean_terminated_length": 181.58330993652345, "completions/min_length": 46.4, "completions/min_terminated_length": 77.0, "epoch": 0.72, "grad_norm": 0.001211289200000465, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 750137550.0, "reward": 0.9408392310142517, "reward_std": 0.08822100460529328, "rewards/accuracy_reward": 0.56865234375, "rewards/brier_reward": 0.7844376921653747, "rewards/confidence_uniqueness_reward": 0.956852662563324, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0027199994772672655, "rewards/frontier_ece_reward": 0.004829889070242644, "rewards/frontier_entropy_batch_reward": -0.18062590062618256, "rewards/volume_coverage_0": 2.3184011668786474e-09, "rewards/volume_coverage_1": 2.3184011668786474e-09, "rewards/volume_coverage_10": 2.2449454442630133e-08, "rewards/volume_coverage_15": 3.639436144453612e-07, "rewards/volume_coverage_20": 1.3974024386698148e-05, "rewards/volume_coverage_25": 0.002402997249737382, "rewards/volume_coverage_5": 4.878357745541351e-09, "signal/accuracy_reward/centered_abs_mean": 0.092950439453125, "signal/accuracy_reward/group_std_mean": 0.12576421201229096, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0464752197265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0464752197265625, "signal/advantage_abs_mean": 0.06602133065462112, "signal/advantage_pre_scale_abs_mean": 0.06602133065462112, "signal/advantage_pre_scale_std": 0.11225824356079102, "signal/advantage_std": 0.11225824356079102, "signal/brier_reward/centered_abs_mean": 0.13668132722377777, "signal/brier_reward/group_std_mean": 0.1750232219696045, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013668132573366165, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013668132573366165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012708039209246635, "signal/confidence_uniqueness_reward/group_std_mean": 0.017119022272527217, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012708039255812764, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012708039255812764, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_std_mean": 0.002425827318802476, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003009202517569065, "signal/frontier_aurc_reward/group_std_mean": 0.005182502605021, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.761503321584314e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.761503321584314e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009087760373950005, "signal/frontier_ece_reward/group_std_mean": 0.012493956461548805, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009087760234251618, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009087760234251618, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2623806893825531, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3368456959724426, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02623806968331337, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02623806968331337, "signal/volume_coverage_0/centered_abs_mean": 5.69514373438551e-09, "signal/volume_coverage_0/group_std_mean": 7.23545801051273e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.76875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.695143467931985e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.695143467931985e-10, "signal/volume_coverage_1/centered_abs_mean": 5.69514373438551e-09, "signal/volume_coverage_1/group_std_mean": 7.23545801051273e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.76875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.695143467931985e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.695143467931985e-10, "signal/volume_coverage_10/centered_abs_mean": 6.64894066915167e-08, "signal/volume_coverage_10/group_std_mean": 8.501251524961617e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.53125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.6489410199821465e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 6.6489410199821465e-09, "signal/volume_coverage_15/centered_abs_mean": 4.1270519659519776e-07, "signal/volume_coverage_15/group_std_mean": 5.180600197718377e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.30625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.1270518913449904e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 4.1270518913449904e-08, "signal/volume_coverage_20/centered_abs_mean": 4.9445707554696126e-05, "signal/volume_coverage_20/group_std_mean": 6.335518482956105e-05, "signal/volume_coverage_20/group_zero_std_frac": 0.1, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.94457085551403e-06, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 4.94457085551403e-06, "signal/volume_coverage_25/centered_abs_mean": 0.004809588473290205, "signal/volume_coverage_25/group_std_mean": 0.006234651803970337, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0004809588834177703, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0004809588834177703, "signal/volume_coverage_5/centered_abs_mean": 1.8328495698938242e-08, "signal/volume_coverage_5/group_std_mean": 2.352717913822744e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.746875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.832849616523191e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.832849616523191e-09, "step": 225 }, { "calibration/aurc": 0.2828847264622761, "calibration/batch_distribution_entropy": 0.9849187010660106, "calibration/buffer_distribution_entropy": 0.9990935703753154, "calibration/confidence_entropy": 0.49405605315020173, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.05859375, "calibration/coverage@15%": 0.121875, "calibration/coverage@20%": 0.391015625, "calibration/coverage@25%": 0.50078125, "calibration/coverage@30%": 0.611328125, "calibration/coverage@5%": 0.005859375, "calibration/ece": 0.13080733854407284, "calibration/mean_confidence": 0.5411869919490867, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 672.8, "completions/max_terminated_length": 672.8, "completions/mean_length": 181.01396484375, "completions/mean_terminated_length": 181.0318176269531, "completions/min_length": 59.2, "completions/min_terminated_length": 75.6, "epoch": 0.736, "grad_norm": 0.001337669906206429, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 766930717.0, "reward": 0.9395357966423035, "reward_std": 0.08629318326711655, "rewards/accuracy_reward": 0.55966796875, "rewards/brier_reward": 0.7854518890380859, "rewards/confidence_uniqueness_reward": 0.9589093208312989, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003115908848121762, "rewards/frontier_ece_reward": 0.005211750976741314, "rewards/frontier_entropy_batch_reward": -0.15451081991195678, "rewards/volume_coverage_0": 2.006316779379347e-09, "rewards/volume_coverage_1": 2.006316779379347e-09, "rewards/volume_coverage_10": 1.695187847872148e-08, "rewards/volume_coverage_15": 9.291495466356991e-08, "rewards/volume_coverage_20": 5.661431605403777e-05, "rewards/volume_coverage_25": 0.003265319438651204, "rewards/volume_coverage_5": 5.1611755313984984e-09, "signal/accuracy_reward/centered_abs_mean": 0.091168212890625, "signal/accuracy_reward/group_std_mean": 0.12514048665761948, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455841064453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0455841064453125, "signal/advantage_abs_mean": 0.06469556018710136, "signal/advantage_pre_scale_abs_mean": 0.06469556018710136, "signal/advantage_pre_scale_std": 0.10994518399238587, "signal/advantage_std": 0.10994518399238587, "signal/brier_reward/centered_abs_mean": 0.14189725518226623, "signal/brier_reward/group_std_mean": 0.18289188742637635, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014189725369215011, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014189725369215011, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011633879318833352, "signal/confidence_uniqueness_reward/group_std_mean": 0.015210827626287938, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001163387973792851, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001163387973792851, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034561751410365103, "signal/frontier_aurc_reward/group_std_mean": 0.005828452110290527, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.320219159126282e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.320219159126282e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009344759956002235, "signal/frontier_ece_reward/group_std_mean": 0.01285859029740095, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009344760212115944, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009344760212115944, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23316074311733245, "signal/frontier_entropy_batch_reward/group_std_mean": 0.306648051738739, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023316074162721634, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023316074162721634, "signal/volume_coverage_0/centered_abs_mean": 5.230916322318535e-09, "signal/volume_coverage_0/group_std_mean": 6.707384425652663e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.771875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.230916477749759e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.230916477749759e-10, "signal/volume_coverage_1/centered_abs_mean": 5.230916322318535e-09, "signal/volume_coverage_1/group_std_mean": 6.707384425652663e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.771875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.230916477749759e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.230916477749759e-10, "signal/volume_coverage_10/centered_abs_mean": 6.583693021866565e-08, "signal/volume_coverage_10/group_std_mean": 8.430675393356069e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.471875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.583693457073991e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 6.583693457073991e-09, "signal/volume_coverage_15/centered_abs_mean": 3.5697502198672737e-07, "signal/volume_coverage_15/group_std_mean": 4.58353112264831e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.20625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.569749988940884e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.569749988940884e-08, "signal/volume_coverage_20/centered_abs_mean": 0.00013909742992836981, "signal/volume_coverage_20/group_std_mean": 0.00017906517896335573, "signal/volume_coverage_20/group_zero_std_frac": 0.009375, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.390974321111571e-05, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.390974321111571e-05, "signal/volume_coverage_25/centered_abs_mean": 0.005795108247548341, "signal/volume_coverage_25/group_std_mean": 0.007614379748702049, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0005795108270831406, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0005795108270831406, "signal/volume_coverage_5/centered_abs_mean": 1.3240050122931279e-08, "signal/volume_coverage_5/group_std_mean": 1.6939196179066583e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.665625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3240050056317898e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.3240050056317898e-09, "step": 230 }, { "calibration/aurc": 0.3052104139588967, "calibration/batch_distribution_entropy": 0.9845980873959596, "calibration/buffer_distribution_entropy": 0.9992672107871774, "calibration/confidence_entropy": 0.481453651763142, "calibration/coverage@0%": 0.005867783757338552, "calibration/coverage@1%": 0.005867783757338552, "calibration/coverage@10%": 0.02501605308219178, "calibration/coverage@15%": 0.0840913955479452, "calibration/coverage@20%": 0.28422975782778864, "calibration/coverage@25%": 0.4041883255870841, "calibration/coverage@30%": 0.5483564701565558, "calibration/coverage@5%": 0.005867783757338552, "calibration/ece": 0.13235262642024298, "calibration/mean_confidence": 0.4699991623427877, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 519.8, "completions/max_terminated_length": 519.8, "completions/mean_length": 183.3912109375, "completions/mean_terminated_length": 183.4627258300781, "completions/min_length": 17.0, "completions/min_terminated_length": 80.6, "epoch": 0.752, "grad_norm": 0.0008947931928560138, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 784035843.0, "reward": 0.937898600101471, "reward_std": 0.08305409550666809, "rewards/accuracy_reward": 0.56201171875, "rewards/brier_reward": 0.7806243419647216, "rewards/confidence_uniqueness_reward": 0.956540560722351, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0032437034416943787, "rewards/frontier_ece_reward": 0.0044742335565388204, "rewards/frontier_entropy_batch_reward": -0.17429540455341339, "rewards/volume_coverage_0": 4.7879359588165474e-09, "rewards/volume_coverage_1": 4.7879359588165474e-09, "rewards/volume_coverage_10": 8.599188774383038e-08, "rewards/volume_coverage_15": 3.474765513544753e-07, "rewards/volume_coverage_20": 7.103228454070631e-05, "rewards/volume_coverage_25": 0.003870873898267746, "rewards/volume_coverage_5": 1.2054030928965177e-08, "signal/accuracy_reward/centered_abs_mean": 0.083892822265625, "signal/accuracy_reward/group_std_mean": 0.11160431355237961, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419464111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0419464111328125, "signal/advantage_abs_mean": 0.06367235779762268, "signal/advantage_pre_scale_abs_mean": 0.06367235779762268, "signal/advantage_pre_scale_std": 0.10790289640426635, "signal/advantage_std": 0.10790289640426635, "signal/brier_reward/centered_abs_mean": 0.1400133416056633, "signal/brier_reward/group_std_mean": 0.17920613586902617, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014001334644854068, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014001334644854068, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012570606358349324, "signal/confidence_uniqueness_reward/group_std_mean": 0.016902846470475196, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012570605846121906, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012570605846121906, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003417500341311097, "signal/frontier_aurc_reward/group_std_mean": 0.005739410500973463, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.271875586709939e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.271875586709939e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009020579047501087, "signal/frontier_ece_reward/group_std_mean": 0.012551595270633698, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009020578931085765, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009020578931085765, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2512552380561829, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32768315076828003, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025125524401664732, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025125524401664732, "signal/volume_coverage_0/centered_abs_mean": 7.3125370647630915e-09, "signal/volume_coverage_0/group_std_mean": 9.442500292777822e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.690625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.312537242398776e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.312537242398776e-10, "signal/volume_coverage_1/centered_abs_mean": 7.3125370647630915e-09, "signal/volume_coverage_1/group_std_mean": 9.442500292777822e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.690625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.312537242398776e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.312537242398776e-10, "signal/volume_coverage_10/centered_abs_mean": 1.515987818834219e-07, "signal/volume_coverage_10/group_std_mean": 1.9257330734490098e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.4625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.5159878508086422e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.5159878508086422e-08, "signal/volume_coverage_15/centered_abs_mean": 6.380775175784948e-07, "signal/volume_coverage_15/group_std_mean": 8.159376591265754e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.296875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.380775232628367e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.380775232628367e-08, "signal/volume_coverage_20/centered_abs_mean": 0.00014088547613937407, "signal/volume_coverage_20/group_std_mean": 0.0001808962260838598, "signal/volume_coverage_20/group_zero_std_frac": 0.0125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.4088547322899103e-05, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.4088547322899103e-05, "signal/volume_coverage_25/centered_abs_mean": 0.00589982932433486, "signal/volume_coverage_25/group_std_mean": 0.0076646491885185245, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0005899829440750182, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0005899829440750182, "signal/volume_coverage_5/centered_abs_mean": 2.8907768445662896e-08, "signal/volume_coverage_5/group_std_mean": 3.686327421092983e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.8907769289432395e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.8907769289432395e-09, "step": 235 }, { "calibration/aurc": 0.28967973925663304, "calibration/batch_distribution_entropy": 0.9882842527096105, "calibration/buffer_distribution_entropy": 0.999540690858549, "calibration/confidence_entropy": 0.5116336247799427, "calibration/coverage@0%": 0.014069379892367905, "calibration/coverage@1%": 0.014069379892367905, "calibration/coverage@10%": 0.19336701932485323, "calibration/coverage@15%": 0.2898513943248532, "calibration/coverage@20%": 0.36487509173189825, "calibration/coverage@25%": 0.441084423923679, "calibration/coverage@30%": 0.5059434625733855, "calibration/coverage@5%": 0.06680375489236791, "calibration/ece": 0.1683775357806439, "calibration/mean_confidence": 0.48505974704758303, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 718.4, "completions/max_terminated_length": 718.4, "completions/mean_length": 191.35693359375, "completions/mean_terminated_length": 191.43230590820312, "completions/min_length": 49.4, "completions/min_terminated_length": 83.8, "epoch": 0.768, "grad_norm": 0.0010617813095450401, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 800928042.0, "reward": 0.9159645438194275, "reward_std": 0.08468157351016999, "rewards/accuracy_reward": 0.516015625, "rewards/brier_reward": 0.7906257748603821, "rewards/confidence_uniqueness_reward": 0.954690670967102, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0030185111332684754, "rewards/frontier_ece_reward": 0.004940245300531387, "rewards/frontier_entropy_batch_reward": -0.17311182320117952, "rewards/volume_coverage_0": 6.364088045440752e-09, "rewards/volume_coverage_1": 6.364088045440752e-09, "rewards/volume_coverage_10": 2.1145389084153977e-08, "rewards/volume_coverage_15": 3.079553749785191e-07, "rewards/volume_coverage_20": 8.821834267109807e-05, "rewards/volume_coverage_25": 0.004664366459473967, "rewards/volume_coverage_5": 1.2051903697241073e-08, "signal/accuracy_reward/centered_abs_mean": 0.08985595703125, "signal/accuracy_reward/group_std_mean": 0.11778665035963058, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044927978515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044927978515625, "signal/advantage_abs_mean": 0.06518116667866707, "signal/advantage_pre_scale_abs_mean": 0.06518116667866707, "signal/advantage_pre_scale_std": 0.11050616502761841, "signal/advantage_std": 0.11050616502761841, "signal/brier_reward/centered_abs_mean": 0.13591494858264924, "signal/brier_reward/group_std_mean": 0.17277559041976928, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013591495528817176, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013591495528817176, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012128811329603195, "signal/confidence_uniqueness_reward/group_std_mean": 0.01648256555199623, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012128812028095125, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012128812028095125, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00310793062672019, "signal/frontier_aurc_reward/group_std_mean": 0.005243441369384527, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.884913166984916e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.884913166984916e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.008205472864210606, "signal/frontier_ece_reward/group_std_mean": 0.011111721023917198, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008205473190173507, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008205473190173507, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24350886940956115, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3188900947570801, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024350887164473534, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024350887164473534, "signal/volume_coverage_0/centered_abs_mean": 1.2351764944185106e-08, "signal/volume_coverage_0/group_std_mean": 1.568862941780935e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.55625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2351765832363526e-09, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.2351765832363526e-09, "signal/volume_coverage_1/centered_abs_mean": 1.2351764944185106e-08, "signal/volume_coverage_1/group_std_mean": 1.568862941780935e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.55625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2351765832363526e-09, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.2351765832363526e-09, "signal/volume_coverage_10/centered_abs_mean": 8.648569576052978e-08, "signal/volume_coverage_10/group_std_mean": 1.1136818187651442e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.403125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.64856977145223e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.64856977145223e-09, "signal/volume_coverage_15/centered_abs_mean": 5.247908120509237e-07, "signal/volume_coverage_15/group_std_mean": 6.701182428514585e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.23125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.2479082057743655e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.2479082057743655e-08, "signal/volume_coverage_20/centered_abs_mean": 0.0001288706494960934, "signal/volume_coverage_20/group_std_mean": 0.00016244519356405361, "signal/volume_coverage_20/group_zero_std_frac": 0.090625, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.2887064895039656e-05, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.2887064895039656e-05, "signal/volume_coverage_25/centered_abs_mean": 0.00626561539247632, "signal/volume_coverage_25/group_std_mean": 0.008005017414689064, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0006265615345910191, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0006265615345910191, "signal/volume_coverage_5/centered_abs_mean": 2.908500817966342e-08, "signal/volume_coverage_5/group_std_mean": 3.7089595750217085e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.521875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.9085007291485e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.9085007291485e-09, "step": 240 }, { "calibration/aurc": 0.3409073022681162, "calibration/batch_distribution_entropy": 0.9818591637152853, "calibration/buffer_distribution_entropy": 0.999580250636168, "calibration/confidence_entropy": 0.4921108300165389, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.14609375, "calibration/coverage@15%": 0.23587840741650296, "calibration/coverage@20%": 0.29508610633595284, "calibration/coverage@25%": 0.3417247360019647, "calibration/coverage@30%": 0.39851731335952845, "calibration/coverage@5%": 0.0671875, "calibration/ece": 0.14462367827428624, "calibration/mean_confidence": 0.5428684503415782, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 606.2, "completions/max_terminated_length": 606.2, "completions/mean_length": 193.3060546875, "completions/mean_terminated_length": 193.4197784423828, "completions/min_length": 34.2, "completions/min_terminated_length": 87.4, "epoch": 0.784, "grad_norm": 0.0009081112220883369, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 818081864.0, "reward": 0.9324936270713806, "reward_std": 0.08699233829975128, "rewards/accuracy_reward": 0.55498046875, "rewards/brier_reward": 0.7820330500602722, "rewards/confidence_uniqueness_reward": 0.9537600040435791, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.003351992089301348, "rewards/frontier_ece_reward": 0.00407652840949595, "rewards/frontier_entropy_batch_reward": -0.19070782661437988, "rewards/volume_coverage_0": 3.5432023204329256e-09, "rewards/volume_coverage_1": 3.5432023204329256e-09, "rewards/volume_coverage_10": 2.968028498528952e-08, "rewards/volume_coverage_15": 3.8283018959361926e-07, "rewards/volume_coverage_20": 0.00011098165159637575, "rewards/volume_coverage_25": 0.004109382582828403, "rewards/volume_coverage_5": 1.2307213759221724e-08, "signal/accuracy_reward/centered_abs_mean": 0.086224365234375, "signal/accuracy_reward/group_std_mean": 0.1191135048866272, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0431121826171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0431121826171875, "signal/advantage_abs_mean": 0.06541964039206505, "signal/advantage_pre_scale_abs_mean": 0.06541964039206505, "signal/advantage_pre_scale_std": 0.11036005318164825, "signal/advantage_std": 0.11036005318164825, "signal/brier_reward/centered_abs_mean": 0.1302357941865921, "signal/brier_reward/group_std_mean": 0.16782908141613007, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013023579306900502, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013023579306900502, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012619849853217601, "signal/confidence_uniqueness_reward/group_std_mean": 0.016879118233919143, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012619849760085345, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012619849760085345, "signal/format_reward/centered_abs_mean": 0.0010986328125, "signal/format_reward/group_std_mean": 0.0025827332865446806, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00054931640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00054931640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003462395863607526, "signal/frontier_aurc_reward/group_std_mean": 0.00611389996483922, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.327995120547712e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.327995120547712e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007947401888668538, "signal/frontier_ece_reward/group_std_mean": 0.010621737688779831, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007947401842102409, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007947401842102409, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2670396983623505, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34181196689605714, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02670396976172924, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02670396976172924, "signal/volume_coverage_0/centered_abs_mean": 7.350568687058967e-09, "signal/volume_coverage_0/group_std_mean": 9.562578107136233e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.65, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.350568487218823e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.350568487218823e-10, "signal/volume_coverage_1/centered_abs_mean": 7.350568687058967e-09, "signal/volume_coverage_1/group_std_mean": 9.562578107136233e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.65, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.350568487218823e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.350568487218823e-10, "signal/volume_coverage_10/centered_abs_mean": 4.7587561269324394e-08, "signal/volume_coverage_10/group_std_mean": 6.18870934943061e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.534375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.758756055878166e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.758756055878166e-09, "signal/volume_coverage_15/centered_abs_mean": 9.245440821814555e-07, "signal/volume_coverage_15/group_std_mean": 1.2132376781437415e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.15, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 9.245441106031649e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 9.245441106031649e-08, "signal/volume_coverage_20/centered_abs_mean": 0.00030603163759224117, "signal/volume_coverage_20/group_std_mean": 0.00039949056517798456, "signal/volume_coverage_20/group_zero_std_frac": 0.05, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.0603163759224115e-05, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.0603163759224115e-05, "signal/volume_coverage_25/centered_abs_mean": 0.006639927253127098, "signal/volume_coverage_25/group_std_mean": 0.008674658834934235, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0006639927276410162, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0006639927276410162, "signal/volume_coverage_5/centered_abs_mean": 1.897898034286527e-08, "signal/volume_coverage_5/group_std_mean": 2.4780702645443853e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.571875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.8978980609318796e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.8978980609318796e-09, "step": 245 }, { "calibration/aurc": 0.2528686613631735, "calibration/batch_distribution_entropy": 0.9800658866295919, "calibration/buffer_distribution_entropy": 0.9994979177014567, "calibration/confidence_entropy": 0.5019152426365978, "calibration/coverage@0%": 0.004694416007288473, "calibration/coverage@1%": 0.004694416007288473, "calibration/coverage@10%": 0.12131790929280242, "calibration/coverage@15%": 0.26051909660693096, "calibration/coverage@20%": 0.44419644060973545, "calibration/coverage@25%": 0.5879300805507188, "calibration/coverage@30%": 0.6937708383091669, "calibration/coverage@5%": 0.004694416007288473, "calibration/ece": 0.1269755624021652, "calibration/mean_confidence": 0.4990723125611093, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 592.6, "completions/max_terminated_length": 592.6, "completions/mean_length": 199.9748046875, "completions/mean_terminated_length": 200.09163818359374, "completions/min_length": 55.4, "completions/min_terminated_length": 89.6, "epoch": 0.8, "grad_norm": 0.0012367883464321494, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 835140166.0, "reward": 0.9509989142417907, "reward_std": 0.08965336829423905, "rewards/accuracy_reward": 0.593359375, "rewards/brier_reward": 0.7926764488220215, "rewards/confidence_uniqueness_reward": 0.9519682645797729, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0032140606548637153, "rewards/frontier_ece_reward": 0.0039047694765031336, "rewards/frontier_entropy_batch_reward": -0.20638387203216552, "rewards/volume_coverage_0": 2.2559438539460076e-09, "rewards/volume_coverage_1": 2.2559438539460076e-09, "rewards/volume_coverage_10": 4.5241598911616165e-08, "rewards/volume_coverage_15": 9.52254220010218e-07, "rewards/volume_coverage_20": 0.00027118420985061675, "rewards/volume_coverage_25": 0.004574334062635898, "rewards/volume_coverage_5": 7.028363846472985e-09, "signal/accuracy_reward/centered_abs_mean": 0.0971435546875, "signal/accuracy_reward/group_std_mean": 0.12513308376073837, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04857177734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04857177734375, "signal/advantage_abs_mean": 0.06999209970235824, "signal/advantage_pre_scale_abs_mean": 0.06999209970235824, "signal/advantage_pre_scale_std": 0.11670937389135361, "signal/advantage_std": 0.11670937389135361, "signal/brier_reward/centered_abs_mean": 0.1270011395215988, "signal/brier_reward/group_std_mean": 0.16291393637657164, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012700113654136657, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012700113654136657, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013134096190333367, "signal/confidence_uniqueness_reward/group_std_mean": 0.01759086810052395, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013134096516296268, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013134096516296268, "signal/format_reward/centered_abs_mean": 0.001251220703125, "signal/format_reward/group_std_mean": 0.002707315143197775, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003635000390931964, "signal/frontier_aurc_reward/group_std_mean": 0.006264658644795418, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.543750619632192e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.543750619632192e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007841948978602886, "signal/frontier_ece_reward/group_std_mean": 0.010542181693017483, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007841949234716594, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007841949234716594, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274287748336792, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34808642268180845, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027428776770830155, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027428776770830155, "signal/volume_coverage_0/centered_abs_mean": 7.121829703748972e-09, "signal/volume_coverage_0/group_std_mean": 9.051382132696518e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.73125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.121830125633721e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.121830125633721e-10, "signal/volume_coverage_1/centered_abs_mean": 7.121829703748972e-09, "signal/volume_coverage_1/group_std_mean": 9.051382132696518e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.73125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.121830125633721e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.121830125633721e-10, "signal/volume_coverage_10/centered_abs_mean": 1.477469595556613e-07, "signal/volume_coverage_10/group_std_mean": 1.8689709406771727e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.446875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4774697110198076e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.4774697110198076e-08, "signal/volume_coverage_15/centered_abs_mean": 2.0268829473479854e-06, "signal/volume_coverage_15/group_std_mean": 2.5734173163982634e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.290625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.026883009875746e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.026883009875746e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0006177243252750486, "signal/volume_coverage_20/group_std_mean": 0.000787645042873919, "signal/volume_coverage_20/group_zero_std_frac": 0.25, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.177243412821553e-05, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 6.177243412821553e-05, "signal/volume_coverage_25/centered_abs_mean": 0.007191289030015468, "signal/volume_coverage_25/group_std_mean": 0.009311573766171932, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0007191289332695305, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0007191289332695305, "signal/volume_coverage_5/centered_abs_mean": 2.6008427056467555e-08, "signal/volume_coverage_5/group_std_mean": 3.3191895454365294e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.703125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.6008427411738922e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.6008427411738922e-09, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4908944294407222, "eval_calibration/batch_distribution_entropy": 0.9348605742427996, "eval_calibration/buffer_distribution_entropy": 0.9995711557006871, "eval_calibration/confidence_entropy": 0.505417142366378, "eval_calibration/coverage@0%": 0.0546875, "eval_calibration/coverage@1%": 0.0546875, "eval_calibration/coverage@10%": 0.0546875, "eval_calibration/coverage@15%": 0.0859375, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.1875, "eval_calibration/coverage@30%": 0.1875, "eval_calibration/coverage@5%": 0.0546875, "eval_calibration/ece": 0.2639847418086436, "eval_calibration/mean_confidence": 0.4755017740367686, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 741.5, "eval_completions/max_terminated_length": 741.5, "eval_completions/mean_length": 218.63106155395508, "eval_completions/mean_terminated_length": 218.63106155395508, "eval_completions/min_length": 108.25, "eval_completions/min_terminated_length": 108.25, "eval_loss": 0.0, "eval_num_tokens": 835140166.0, "eval_reward": 0.7909766435623169, "eval_reward_std": 0.2465236335992813, "eval_rewards/accuracy_reward": 0.447265625, "eval_rewards/brier_reward": 0.7693072855472565, "eval_rewards/confidence_uniqueness_reward": 0.89453125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004427299543749541, "eval_rewards/frontier_ece_reward": 0.003629253071267158, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_rewards/volume_coverage_0": 6.361557280953889e-09, "eval_rewards/volume_coverage_1": 6.361557280953889e-09, "eval_rewards/volume_coverage_10": 6.214037906460135e-08, "eval_rewards/volume_coverage_15": 2.913196333054202e-07, "eval_rewards/volume_coverage_20": 0.0006583120702998713, "eval_rewards/volume_coverage_25": 0.005864958860911429, "eval_rewards/volume_coverage_5": 1.7527957307805764e-08, "eval_runtime": 31.2892, "eval_samples_per_second": 15.98, "eval_signal/accuracy_reward/centered_abs_mean": 0.4781494140625, "eval_signal/accuracy_reward/group_std_mean": 0.49640604108572006, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23907470703125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23907470703125, "eval_signal/advantage_abs_mean": 0.23595283553004265, "eval_signal/advantage_pre_scale_abs_mean": 0.23595283553004265, "eval_signal/advantage_pre_scale_std": 0.24372952803969383, "eval_signal/advantage_std": 0.24372952803969383, "eval_signal/brier_reward/centered_abs_mean": 0.19793446362018585, "eval_signal/brier_reward/group_std_mean": 0.24987414851784706, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019793446641415358, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019793446641415358, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0426177978515625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.049744920805096626, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004261779831722379, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004261779831722379, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.006218503811396658, "eval_signal/frontier_aurc_reward/group_std_mean": 0.013023799983784556, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.773129982524551e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.773129982524551e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.010007256641983986, "eval_signal/frontier_ece_reward/group_std_mean": 0.013597892131656408, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010007255914388224, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010007255914388224, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_signal/volume_coverage_0/centered_abs_mean": 1.7614849578961866e-08, "eval_signal/volume_coverage_0/group_std_mean": 2.2101196428536696e-08, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.5, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.7614848746294598e-09, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 1.7614848746294598e-09, "eval_signal/volume_coverage_1/centered_abs_mean": 1.7614849578961866e-08, "eval_signal/volume_coverage_1/group_std_mean": 2.2101196428536696e-08, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.5, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.7614848746294598e-09, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 1.7614848746294598e-09, "eval_signal/volume_coverage_10/centered_abs_mean": 1.73200039199628e-07, "eval_signal/volume_coverage_10/group_std_mean": 2.176570355061358e-07, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.5, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.732000431964309e-08, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.732000431964309e-08, "eval_signal/volume_coverage_15/centered_abs_mean": 1.1045231005368805e-06, "eval_signal/volume_coverage_15/group_std_mean": 1.4119637938847518e-06, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.5, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.1045231707029757e-07, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 1.1045231707029757e-07, "eval_signal/volume_coverage_20/centered_abs_mean": 0.0016010731924325228, "eval_signal/volume_coverage_20/group_std_mean": 0.002093737944960594, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.125, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016010730905691162, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016010730905691162, "eval_signal/volume_coverage_25/centered_abs_mean": 0.013395349029451609, "eval_signal/volume_coverage_25/group_std_mean": 0.018266907893121243, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.001339534908765927, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.001339534908765927, "eval_signal/volume_coverage_5/centered_abs_mean": 4.967059652472017e-08, "eval_signal/volume_coverage_5/group_std_mean": 6.237662741881422e-08, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.5, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.967059707983168e-09, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 4.967059707983168e-09, "eval_steps_per_second": 0.128, "step": 250 }, { "calibration/aurc": 0.23735285794686112, "calibration/batch_distribution_entropy": 0.9789768078562158, "calibration/buffer_distribution_entropy": 0.999559891129499, "calibration/confidence_entropy": 0.49745628597302466, "calibration/coverage@0%": 0.02659242099883121, "calibration/coverage@1%": 0.02659242099883121, "calibration/coverage@10%": 0.10261541338586462, "calibration/coverage@15%": 0.28655421229032024, "calibration/coverage@20%": 0.469679150414842, "calibration/coverage@25%": 0.5976479013159413, "calibration/coverage@30%": 0.7012977345284872, "calibration/coverage@5%": 0.05675342910295696, "calibration/ece": 0.1411352071071634, "calibration/mean_confidence": 0.5183968192355468, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 833.8, "completions/max_terminated_length": 833.8, "completions/mean_length": 212.0490234375, "completions/mean_terminated_length": 212.17300720214843, "completions/min_length": 17.0, "completions/min_terminated_length": 87.0, "epoch": 0.816, "grad_norm": 0.0010728145716711879, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 852410716.0, "reward": 0.9503415584564209, "reward_std": 0.09045878946781158, "rewards/accuracy_reward": 0.58798828125, "rewards/brier_reward": 0.7771391987800598, "rewards/confidence_uniqueness_reward": 0.9539396047592164, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0030888376757502558, "rewards/frontier_ece_reward": 0.0029682864900678397, "rewards/frontier_entropy_batch_reward": -0.17172586619853974, "rewards/volume_coverage_0": 1.3925749109411355e-09, "rewards/volume_coverage_1": 1.3925749109411355e-09, "rewards/volume_coverage_10": 1.897763094449445e-08, "rewards/volume_coverage_15": 4.220935450049978e-08, "rewards/volume_coverage_20": 0.00028864066698588433, "rewards/volume_coverage_25": 0.004179897159337998, "rewards/volume_coverage_5": 1.9938326212276535e-09, "signal/accuracy_reward/centered_abs_mean": 0.099285888671875, "signal/accuracy_reward/group_std_mean": 0.13266663253307343, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0496429443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0496429443359375, "signal/advantage_abs_mean": 0.06852349787950515, "signal/advantage_pre_scale_abs_mean": 0.06852349787950515, "signal/advantage_pre_scale_std": 0.11623869091272354, "signal/advantage_std": 0.11623869091272354, "signal/brier_reward/centered_abs_mean": 0.1350185066461563, "signal/brier_reward/group_std_mean": 0.1722848892211914, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013501851074397563, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013501851074397563, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01146103423088789, "signal/confidence_uniqueness_reward/group_std_mean": 0.015553070977330209, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011461034882813692, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011461034882813692, "signal/format_reward/centered_abs_mean": 0.0010986328125, "signal/format_reward/group_std_mean": 0.0025827332865446806, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00054931640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00054931640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035511314868927, "signal/frontier_aurc_reward/group_std_mean": 0.00631262669339776, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.438914693309926e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.438914693309926e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007486023474484682, "signal/frontier_ece_reward/group_std_mean": 0.010013842955231667, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007486023707315326, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007486023707315326, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24344446063041686, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3181091547012329, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024344447255134582, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024344447255134582, "signal/volume_coverage_0/centered_abs_mean": 7.336063845286844e-09, "signal/volume_coverage_0/group_std_mean": 9.247960708336222e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.675, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.336064200558212e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.336064200558212e-10, "signal/volume_coverage_1/centered_abs_mean": 7.336063845286844e-09, "signal/volume_coverage_1/group_std_mean": 9.247960708336222e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.675, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.336064200558212e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.336064200558212e-10, "signal/volume_coverage_10/centered_abs_mean": 9.209922708919294e-08, "signal/volume_coverage_10/group_std_mean": 1.172154071582554e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.509375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 9.209923401698461e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 9.209923401698461e-09, "signal/volume_coverage_15/centered_abs_mean": 2.9229093740923416e-07, "signal/volume_coverage_15/group_std_mean": 3.7030814752370135e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.5, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.9229093456706325e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.9229093456706325e-08, "signal/volume_coverage_20/centered_abs_mean": 0.001035915408283472, "signal/volume_coverage_20/group_std_mean": 0.0013216811465099453, "signal/volume_coverage_20/group_zero_std_frac": 0.05, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00010359154257457703, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00010359154257457703, "signal/volume_coverage_25/centered_abs_mean": 0.008257150836288928, "signal/volume_coverage_25/group_std_mean": 0.01067999266088009, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008257150650024414, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008257150650024414, "signal/volume_coverage_5/centered_abs_mean": 9.664288569410928e-09, "signal/volume_coverage_5/group_std_mean": 1.2128600435090675e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.675, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 9.664288480593085e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 9.664288480593085e-10, "step": 255 }, { "calibration/aurc": 0.2834820299595159, "calibration/batch_distribution_entropy": 0.969581570766635, "calibration/buffer_distribution_entropy": 0.9996472034772383, "calibration/confidence_entropy": 0.5025546700203597, "calibration/coverage@0%": 0.03203125, "calibration/coverage@1%": 0.03203125, "calibration/coverage@10%": 0.229296875, "calibration/coverage@15%": 0.25625, "calibration/coverage@20%": 0.330859375, "calibration/coverage@25%": 0.4421875, "calibration/coverage@30%": 0.560546875, "calibration/coverage@5%": 0.1625, "calibration/ece": 0.11813969667517914, "calibration/mean_confidence": 0.4632866735346511, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 752.4, "completions/max_terminated_length": 752.4, "completions/mean_length": 225.29521484375, "completions/mean_terminated_length": 225.3841583251953, "completions/min_length": 34.4, "completions/min_terminated_length": 87.6, "epoch": 0.832, "grad_norm": 0.0010744145838543773, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 869726091.0, "reward": 0.9356490731239319, "reward_std": 0.08708105832338334, "rewards/accuracy_reward": 0.5587890625, "rewards/brier_reward": 0.8003982663154602, "rewards/confidence_uniqueness_reward": 0.9526532053947449, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.00265838208142668, "rewards/frontier_ece_reward": 0.0040717648807913065, "rewards/frontier_entropy_batch_reward": -0.20046012997627258, "rewards/volume_coverage_0": 5.32470711966937e-09, "rewards/volume_coverage_1": 5.32470711966937e-09, "rewards/volume_coverage_10": 2.0827092868103135e-07, "rewards/volume_coverage_15": 1.1683033221743243e-06, "rewards/volume_coverage_20": 0.0012323636648943648, "rewards/volume_coverage_25": 0.006934031657874584, "rewards/volume_coverage_5": 7.292380654888575e-09, "signal/accuracy_reward/centered_abs_mean": 0.09027099609375, "signal/accuracy_reward/group_std_mean": 0.12320152074098586, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045135498046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045135498046875, "signal/advantage_abs_mean": 0.06571685001254082, "signal/advantage_pre_scale_abs_mean": 0.06571685001254082, "signal/advantage_pre_scale_std": 0.11133622229099274, "signal/advantage_std": 0.11133622229099274, "signal/brier_reward/centered_abs_mean": 0.12082481384277344, "signal/brier_reward/group_std_mean": 0.15506627261638642, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01208248157054186, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01208248157054186, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01293247528374195, "signal/confidence_uniqueness_reward/group_std_mean": 0.017343126982450486, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012932475423440338, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012932475423440338, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002934473962523043, "signal/frontier_aurc_reward/group_std_mean": 0.005702351313084364, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6680924677057194e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6680924677057194e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00730133531615138, "signal/frontier_ece_reward/group_std_mean": 0.009728312119841575, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007301335572265089, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007301335572265089, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274827253818512, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3532982707023621, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02748272567987442, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02748272567987442, "signal/volume_coverage_0/centered_abs_mean": 8.213022972825002e-09, "signal/volume_coverage_0/group_std_mean": 1.061971328653044e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.60625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.213023483527593e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.213023483527593e-10, "signal/volume_coverage_1/centered_abs_mean": 8.213022972825002e-09, "signal/volume_coverage_1/group_std_mean": 1.061971328653044e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.60625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.213023483527593e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.213023483527593e-10, "signal/volume_coverage_10/centered_abs_mean": 3.5972448699794766e-07, "signal/volume_coverage_10/group_std_mean": 4.666583777179767e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.4875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.597244955244605e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.597244955244605e-08, "signal/volume_coverage_15/centered_abs_mean": 2.127487846337317e-06, "signal/volume_coverage_15/group_std_mean": 2.7779381980508334e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.48125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.127487846337317e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.127487846337317e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0015772737329825758, "signal/volume_coverage_20/group_std_mean": 0.002045056619681418, "signal/volume_coverage_20/group_zero_std_frac": 0.153125, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00015772737679071725, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00015772737679071725, "signal/volume_coverage_25/centered_abs_mean": 0.008564276993274689, "signal/volume_coverage_25/group_std_mean": 0.010968778282403946, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008564276969991625, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008564276969991625, "signal/volume_coverage_5/centered_abs_mean": 1.1323550985764541e-08, "signal/volume_coverage_5/group_std_mean": 1.4639518397530083e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.575, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1323551429853751e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.1323551429853751e-09, "step": 260 }, { "calibration/aurc": 0.32454142744681747, "calibration/batch_distribution_entropy": 0.9714130609011755, "calibration/buffer_distribution_entropy": 0.999706410918014, "calibration/confidence_entropy": 0.49613739343811575, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.135546875, "calibration/coverage@15%": 0.25078125, "calibration/coverage@20%": 0.398046875, "calibration/coverage@25%": 0.484765625, "calibration/coverage@30%": 0.5375, "calibration/coverage@5%": 0.07578125, "calibration/ece": 0.15647323001271482, "calibration/mean_confidence": 0.5257162202100306, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 836.2, "completions/max_terminated_length": 836.2, "completions/mean_length": 232.523046875, "completions/mean_terminated_length": 232.72797241210938, "completions/min_length": 0.0, "completions/min_terminated_length": 97.6, "epoch": 0.848, "grad_norm": 0.0009566603694111109, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 887121495.0, "reward": 0.9239301204681396, "reward_std": 0.08423375785350799, "rewards/accuracy_reward": 0.53525390625, "rewards/brier_reward": 0.7889814138412475, "rewards/confidence_uniqueness_reward": 0.9528720259666443, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003320692852139473, "rewards/frontier_ece_reward": 0.004457092331722379, "rewards/frontier_entropy_batch_reward": -0.18649887144565583, "rewards/volume_coverage_0": 6.994451406949764e-09, "rewards/volume_coverage_1": 6.994451406949764e-09, "rewards/volume_coverage_10": 2.6992355071087106e-07, "rewards/volume_coverage_15": 1.2051786683286992e-06, "rewards/volume_coverage_20": 0.0009353063651360571, "rewards/volume_coverage_25": 0.007092976756393909, "rewards/volume_coverage_5": 1.2135791505052396e-08, "signal/accuracy_reward/centered_abs_mean": 0.080621337890625, "signal/accuracy_reward/group_std_mean": 0.11271409392356872, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0403106689453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0403106689453125, "signal/advantage_abs_mean": 0.061996497213840485, "signal/advantage_pre_scale_abs_mean": 0.061996497213840485, "signal/advantage_pre_scale_std": 0.10738707631826401, "signal/advantage_std": 0.10738707631826401, "signal/brier_reward/centered_abs_mean": 0.1269964024424553, "signal/brier_reward/group_std_mean": 0.16419816315174102, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012699640169739724, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012699640169739724, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012903736904263496, "signal/confidence_uniqueness_reward/group_std_mean": 0.018659178167581558, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012903737602755426, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012903737602755426, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_std_mean": 0.004635536018759013, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003700101049616933, "signal/frontier_aurc_reward/group_std_mean": 0.0065599772147834304, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.625126384780742e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.625126384780742e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007679888419806958, "signal/frontier_ece_reward/group_std_mean": 0.010139138251543046, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007679888512939215, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007679888512939215, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25838564336299896, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33443763256073, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025838563591241835, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025838563591241835, "signal/volume_coverage_0/centered_abs_mean": 9.153766811209606e-09, "signal/volume_coverage_0/group_std_mean": 1.1882709927135693e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.6, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.153766633573923e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 9.153766633573923e-10, "signal/volume_coverage_1/centered_abs_mean": 9.153766811209606e-09, "signal/volume_coverage_1/group_std_mean": 1.1882709927135693e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.6, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.153766633573923e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 9.153766633573923e-10, "signal/volume_coverage_10/centered_abs_mean": 3.123025010154379e-07, "signal/volume_coverage_10/group_std_mean": 4.0348030552195267e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.503125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.1230250030489516e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.1230250030489516e-08, "signal/volume_coverage_15/centered_abs_mean": 1.6549604197280133e-06, "signal/volume_coverage_15/group_std_mean": 2.1525930606003386e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.484375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.654960442465381e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.654960442465381e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0012428722577169538, "signal/volume_coverage_20/group_std_mean": 0.0016125503461807966, "signal/volume_coverage_20/group_zero_std_frac": 0.1, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00012428723130142316, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00012428723130142316, "signal/volume_coverage_25/centered_abs_mean": 0.008850092254579067, "signal/volume_coverage_25/group_std_mean": 0.011401113867759705, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008850092417560518, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008850092417560518, "signal/volume_coverage_5/centered_abs_mean": 1.5453646540208865e-08, "signal/volume_coverage_5/group_std_mean": 2.0075406936825858e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.56875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.545364636257318e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.545364636257318e-09, "step": 265 }, { "calibration/aurc": 0.2793327046643886, "calibration/batch_distribution_entropy": 0.9744149038280241, "calibration/buffer_distribution_entropy": 0.9996895191216847, "calibration/confidence_entropy": 0.5058490737154272, "calibration/coverage@0%": 0.007038894324853229, "calibration/coverage@1%": 0.007038894324853229, "calibration/coverage@10%": 0.06914979818982388, "calibration/coverage@15%": 0.16645364481409003, "calibration/coverage@20%": 0.2856630687377691, "calibration/coverage@25%": 0.41389585371819965, "calibration/coverage@30%": 0.5526090080724071, "calibration/coverage@5%": 0.015242019324853228, "calibration/ece": 0.1423724300679615, "calibration/mean_confidence": 0.5778247707830445, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 719.0, "completions/max_terminated_length": 719.0, "completions/mean_length": 234.0359375, "completions/mean_terminated_length": 234.31072998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 101.8, "epoch": 0.864, "grad_norm": 0.0012890915386378765, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 904504839.0, "reward": 0.9504047274589539, "reward_std": 0.09197955429553986, "rewards/accuracy_reward": 0.59326171875, "rewards/brier_reward": 0.7776164174079895, "rewards/confidence_uniqueness_reward": 0.9520756244659424, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.003015703801065683, "rewards/frontier_ece_reward": 0.0034456577152013777, "rewards/frontier_entropy_batch_reward": -0.1951846957206726, "rewards/volume_coverage_0": 5.715915030179986e-09, "rewards/volume_coverage_1": 5.715915030179986e-09, "rewards/volume_coverage_10": 2.172148666712559e-07, "rewards/volume_coverage_15": 1.2539674798972555e-06, "rewards/volume_coverage_20": 0.0006765133934095502, "rewards/volume_coverage_25": 0.0053439770825207235, "rewards/volume_coverage_5": 1.0568708574965058e-08, "signal/accuracy_reward/centered_abs_mean": 0.097515869140625, "signal/accuracy_reward/group_std_mean": 0.12857878357172012, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487579345703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0487579345703125, "signal/advantage_abs_mean": 0.07035753056406975, "signal/advantage_pre_scale_abs_mean": 0.07035753056406975, "signal/advantage_pre_scale_std": 0.11794359385967254, "signal/advantage_std": 0.11794359385967254, "signal/brier_reward/centered_abs_mean": 0.13027719110250474, "signal/brier_reward/group_std_mean": 0.16806340515613555, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013027719967067242, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013027719967067242, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01354727279394865, "signal/confidence_uniqueness_reward/group_std_mean": 0.020269707962870597, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013547273352742194, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013547273352742194, "signal/format_reward/centered_abs_mean": 0.00225830078125, "signal/format_reward/group_std_mean": 0.0062928176019340755, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001129150390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036219977773725986, "signal/frontier_aurc_reward/group_std_mean": 0.006439856067299843, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5274974399944765e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5274974399944765e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00775745278224349, "signal/frontier_ece_reward/group_std_mean": 0.010124932788312436, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007757453131489456, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007757453131489456, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26516912281513216, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34180880784988404, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026516913250088692, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026516913250088692, "signal/volume_coverage_0/centered_abs_mean": 1.1564463342494945e-08, "signal/volume_coverage_0/group_std_mean": 1.4757365462969574e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.590625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1564463608948473e-09, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.1564463608948473e-09, "signal/volume_coverage_1/centered_abs_mean": 1.1564463342494945e-08, "signal/volume_coverage_1/group_std_mean": 1.4757365462969574e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.590625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1564463608948473e-09, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.1564463608948473e-09, "signal/volume_coverage_10/centered_abs_mean": 3.7779442152441334e-07, "signal/volume_coverage_10/group_std_mean": 4.909618496640177e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.5, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.7779441797169966e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.7779441797169966e-08, "signal/volume_coverage_15/centered_abs_mean": 2.4811426328597007e-06, "signal/volume_coverage_15/group_std_mean": 3.230066158721456e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.5, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.4811426442283845e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.4811426442283845e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0015919221099466086, "signal/volume_coverage_20/group_std_mean": 0.002070562425069511, "signal/volume_coverage_20/group_zero_std_frac": 0.15, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00015919221623335035, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00015919221623335035, "signal/volume_coverage_25/centered_abs_mean": 0.009017400071024894, "signal/volume_coverage_25/group_std_mean": 0.011596359312534332, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009017400327138603, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009017400327138603, "signal/volume_coverage_5/centered_abs_mean": 2.0909239140110002e-08, "signal/volume_coverage_5/group_std_mean": 2.665617309105528e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.553125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.0909240561195476e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.0909240561195476e-09, "step": 270 }, { "calibration/aurc": 0.3976527827378292, "calibration/batch_distribution_entropy": 0.9842513601406309, "calibration/buffer_distribution_entropy": 0.9995667312205757, "calibration/confidence_entropy": 0.4861670621360091, "calibration/coverage@0%": 0.01608005359257437, "calibration/coverage@1%": 0.01608005359257437, "calibration/coverage@10%": 0.03489596599515953, "calibration/coverage@15%": 0.05529896639116259, "calibration/coverage@20%": 0.11558296672613119, "calibration/coverage@25%": 0.1461229080681779, "calibration/coverage@30%": 0.2656455270397041, "calibration/coverage@5%": 0.022366890527741358, "calibration/ece": 0.15154470595631533, "calibration/mean_confidence": 0.4808891593691776, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 953.6, "completions/max_terminated_length": 953.6, "completions/mean_length": 232.0931640625, "completions/mean_terminated_length": 232.34286193847657, "completions/min_length": 0.0, "completions/min_terminated_length": 97.6, "epoch": 0.88, "grad_norm": 0.0011567287147045135, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 922028545.0, "reward": 0.9079472064971924, "reward_std": 0.0888668417930603, "rewards/accuracy_reward": 0.50888671875, "rewards/brier_reward": 0.783502197265625, "rewards/confidence_uniqueness_reward": 0.9519115090370178, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.003150738077238202, "rewards/frontier_ece_reward": 0.004228654690086842, "rewards/frontier_entropy_batch_reward": -0.20748784244060517, "rewards/volume_coverage_0": 1.1710999814340539e-08, "rewards/volume_coverage_1": 1.1710999814340539e-08, "rewards/volume_coverage_10": 2.072748031878291e-07, "rewards/volume_coverage_15": 1.6568533510508131e-06, "rewards/volume_coverage_20": 0.0014447305584326386, "rewards/volume_coverage_25": 0.0072022792883217335, "rewards/volume_coverage_5": 2.0295714620033322e-08, "signal/accuracy_reward/centered_abs_mean": 0.092388916015625, "signal/accuracy_reward/group_std_mean": 0.12169073075056076, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0461944580078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0461944580078125, "signal/advantage_abs_mean": 0.06861607134342193, "signal/advantage_pre_scale_abs_mean": 0.06861607134342193, "signal/advantage_pre_scale_std": 0.11503324806690216, "signal/advantage_std": 0.11503324806690216, "signal/brier_reward/centered_abs_mean": 0.12623945921659468, "signal/brier_reward/group_std_mean": 0.16205861270427704, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012623946368694305, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012623946368694305, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013797298818826676, "signal/confidence_uniqueness_reward/group_std_mean": 0.019599108770489693, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001379729900509119, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001379729900509119, "signal/format_reward/centered_abs_mean": 0.002032470703125, "signal/format_reward/group_std_mean": 0.0050085606053471565, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010162353515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010162353515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003262630198150873, "signal/frontier_aurc_reward/group_std_mean": 0.005495144985616207, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0782876749290156e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0782876749290156e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007399953808635474, "signal/frontier_ece_reward/group_std_mean": 0.009649076312780381, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007399953901767731, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007399953901767731, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.278250652551651, "signal/frontier_entropy_batch_reward/group_std_mean": 0.353388512134552, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027825065329670905, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027825065329670905, "signal/volume_coverage_0/centered_abs_mean": 1.2377601166235763e-08, "signal/volume_coverage_0/group_std_mean": 1.5731156643994382e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.584375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.237760094419116e-09, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.237760094419116e-09, "signal/volume_coverage_1/centered_abs_mean": 1.2377601166235763e-08, "signal/volume_coverage_1/group_std_mean": 1.5731156643994382e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.584375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.237760094419116e-09, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.237760094419116e-09, "signal/volume_coverage_10/centered_abs_mean": 2.3355193263796535e-07, "signal/volume_coverage_10/group_std_mean": 2.960320600209343e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.50625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.3355193512486493e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.3355193512486493e-08, "signal/volume_coverage_15/centered_abs_mean": 1.8819132947101025e-06, "signal/volume_coverage_15/group_std_mean": 2.3752340666760575e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.5, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.881913277657077e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.881913277657077e-07, "signal/volume_coverage_20/centered_abs_mean": 0.001760154077783227, "signal/volume_coverage_20/group_std_mean": 0.0022441008826717735, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00017601540894247593, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00017601540894247593, "signal/volume_coverage_25/centered_abs_mean": 0.00926213078200817, "signal/volume_coverage_25/group_std_mean": 0.011914961040019989, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00092621308285743, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00092621308285743, "signal/volume_coverage_5/centered_abs_mean": 2.1371749170384645e-08, "signal/volume_coverage_5/group_std_mean": 2.7185818396446847e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.58125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.137174881511328e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.137174881511328e-09, "step": 275 }, { "calibration/aurc": 0.37601968611273684, "calibration/batch_distribution_entropy": 0.9797260721921596, "calibration/buffer_distribution_entropy": 0.9994793642036346, "calibration/confidence_entropy": 0.5158069074862448, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.046875, "calibration/coverage@15%": 0.085546875, "calibration/coverage@20%": 0.155078125, "calibration/coverage@25%": 0.19921875, "calibration/coverage@30%": 0.2734375, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.1534649556541345, "calibration/mean_confidence": 0.5264320305031458, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 600.4, "completions/max_terminated_length": 600.4, "completions/mean_length": 228.57685546875, "completions/mean_terminated_length": 228.68885803222656, "completions/min_length": 39.8, "completions/min_terminated_length": 95.0, "epoch": 0.896, "grad_norm": 0.0013456126907840371, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 939480020.0, "reward": 0.9279617309570313, "reward_std": 0.08366731405258179, "rewards/accuracy_reward": 0.54755859375, "rewards/brier_reward": 0.7841137886047364, "rewards/confidence_uniqueness_reward": 0.9525452017784118, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003130245627835393, "rewards/frontier_ece_reward": 0.0031632784754037856, "rewards/frontier_entropy_batch_reward": -0.20224941968917848, "rewards/volume_coverage_0": 7.1152447711497756e-09, "rewards/volume_coverage_1": 7.1152447711497756e-09, "rewards/volume_coverage_10": 7.800173946748146e-08, "rewards/volume_coverage_15": 2.0716353901661934e-06, "rewards/volume_coverage_20": 0.001087343692779541, "rewards/volume_coverage_25": 0.0059946583583951, "rewards/volume_coverage_5": 1.1443985870052131e-08, "signal/accuracy_reward/centered_abs_mean": 0.080853271484375, "signal/accuracy_reward/group_std_mean": 0.11332604438066482, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404266357421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0404266357421875, "signal/advantage_abs_mean": 0.062341734766960144, "signal/advantage_pre_scale_abs_mean": 0.062341734766960144, "signal/advantage_pre_scale_std": 0.10656144320964814, "signal/advantage_std": 0.10656144320964814, "signal/brier_reward/centered_abs_mean": 0.11827864497900009, "signal/brier_reward/group_std_mean": 0.15285103023052216, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011827864684164524, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011827864684164524, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01246865913271904, "signal/confidence_uniqueness_reward/group_std_mean": 0.01718489658087492, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012468658853322268, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012468658853322268, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002935412712395191, "signal/frontier_aurc_reward/group_std_mean": 0.005062458151951432, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.669265897769947e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.669265897769947e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00687292842194438, "signal/frontier_ece_reward/group_std_mean": 0.009145662747323513, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006872928468510508, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006872928468510508, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27478896379470824, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34888529777526855, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02747889719903469, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02747889719903469, "signal/volume_coverage_0/centered_abs_mean": 7.998229900607613e-09, "signal/volume_coverage_0/group_std_mean": 1.0271428330099752e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.71875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.998230300287901e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.998230300287901e-10, "signal/volume_coverage_1/centered_abs_mean": 7.998229900607613e-09, "signal/volume_coverage_1/group_std_mean": 1.0271428330099752e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.71875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.998230300287901e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.998230300287901e-10, "signal/volume_coverage_10/centered_abs_mean": 8.842737599934481e-08, "signal/volume_coverage_10/group_std_mean": 1.134970077032449e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.515625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.842737742043027e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.842737742043027e-09, "signal/volume_coverage_15/centered_abs_mean": 2.146668634850357e-06, "signal/volume_coverage_15/group_std_mean": 2.754710089902801e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.4, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1466687201154854e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.1466687201154854e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0016437669517472386, "signal/volume_coverage_20/group_std_mean": 0.0021336987148970364, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016437669983133675, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016437669983133675, "signal/volume_coverage_25/centered_abs_mean": 0.008214055374264718, "signal/volume_coverage_25/group_std_mean": 0.010711676627397537, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008214055444113911, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008214055444113911, "signal/volume_coverage_5/centered_abs_mean": 1.234739954725228e-08, "signal/volume_coverage_5/group_std_mean": 1.5868885405723178e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2347399769296885e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.2347399769296885e-09, "step": 280 }, { "calibration/aurc": 0.3716300823019802, "calibration/batch_distribution_entropy": 0.982109064948715, "calibration/buffer_distribution_entropy": 0.9992196819951579, "calibration/confidence_entropy": 0.5100215597734005, "calibration/coverage@0%": 0.007042725498271812, "calibration/coverage@1%": 0.007042725498271812, "calibration/coverage@10%": 0.07397031845326203, "calibration/coverage@15%": 0.1361997093534577, "calibration/coverage@20%": 0.20663910514602132, "calibration/coverage@25%": 0.2990031890501309, "calibration/coverage@30%": 0.3921424074943579, "calibration/coverage@5%": 0.01487051414797827, "calibration/ece": 0.1576403585684839, "calibration/mean_confidence": 0.5154867555187955, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 849.0, "completions/max_terminated_length": 849.0, "completions/mean_length": 229.2068359375, "completions/mean_terminated_length": 229.48021240234374, "completions/min_length": 19.4, "completions/min_terminated_length": 92.6, "epoch": 0.912, "grad_norm": 0.0009245709516108036, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 956878394.0, "reward": 0.9328309416770935, "reward_std": 0.08833001106977463, "rewards/accuracy_reward": 0.55703125, "rewards/brier_reward": 0.7842136144638061, "rewards/confidence_uniqueness_reward": 0.9519566416740417, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.0029576101573184133, "rewards/frontier_ece_reward": 0.0033359962981194258, "rewards/frontier_entropy_batch_reward": -0.19582011103630065, "rewards/volume_coverage_0": 2.129067899758752e-09, "rewards/volume_coverage_1": 2.129067899758752e-09, "rewards/volume_coverage_10": 3.5658557218098963e-09, "rewards/volume_coverage_15": 1.1135661338812498e-06, "rewards/volume_coverage_20": 0.0007912997010862455, "rewards/volume_coverage_25": 0.005880184099078178, "rewards/volume_coverage_5": 2.129067899758752e-09, "signal/accuracy_reward/centered_abs_mean": 0.0874755859375, "signal/accuracy_reward/group_std_mean": 0.1201816201210022, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04373779296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04373779296875, "signal/advantage_abs_mean": 0.06516797170042991, "signal/advantage_pre_scale_abs_mean": 0.06516797170042991, "signal/advantage_pre_scale_std": 0.11177586168050765, "signal/advantage_std": 0.11177586168050765, "signal/brier_reward/centered_abs_mean": 0.12937138825654984, "signal/brier_reward/group_std_mean": 0.1675568252801895, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012937139347195625, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012937139347195625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01360544990748167, "signal/confidence_uniqueness_reward/group_std_mean": 0.020998037606477737, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013605450280010701, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013605450280010701, "signal/format_reward/centered_abs_mean": 0.00263671875, "signal/format_reward/group_std_mean": 0.007397671788930893, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001318359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001318359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003030157322064042, "signal/frontier_aurc_reward/group_std_mean": 0.005202419683337211, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.787696623476222e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.787696623476222e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007031449675559997, "signal/frontier_ece_reward/group_std_mean": 0.009188699722290038, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007031450048089027, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007031450048089027, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2668339848518372, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34241083860397337, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026683398336172105, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026683398336172105, "signal/volume_coverage_0/centered_abs_mean": 4.3604963195775784e-09, "signal/volume_coverage_0/group_std_mean": 5.642320566323633e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.784375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.36049640839542e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.36049640839542e-10, "signal/volume_coverage_1/centered_abs_mean": 4.3604963195775784e-09, "signal/volume_coverage_1/group_std_mean": 5.642320566323633e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.784375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.36049640839542e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.36049640839542e-10, "signal/volume_coverage_10/centered_abs_mean": 7.591896622471949e-09, "signal/volume_coverage_10/group_std_mean": 9.819433088864572e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.73125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.591896444836266e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.591896444836266e-10, "signal/volume_coverage_15/centered_abs_mean": 2.0973064010831876e-06, "signal/volume_coverage_15/group_std_mean": 2.675126233953051e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.05, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0973064067675297e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.0973064067675297e-07, "signal/volume_coverage_20/centered_abs_mean": 0.001663878746330738, "signal/volume_coverage_20/group_std_mean": 0.002154796291142702, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016638787637930365, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016638787637930365, "signal/volume_coverage_25/centered_abs_mean": 0.009200803562998771, "signal/volume_coverage_25/group_std_mean": 0.011962664313614368, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.000920080381911248, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.000920080381911248, "signal/volume_coverage_5/centered_abs_mean": 4.3604963195775784e-09, "signal/volume_coverage_5/group_std_mean": 5.642320566323633e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.784375, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.36049640839542e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.36049640839542e-10, "step": 285 }, { "calibration/aurc": 0.4040774793635074, "calibration/batch_distribution_entropy": 0.9859097293318687, "calibration/buffer_distribution_entropy": 0.9991986464845336, "calibration/confidence_entropy": 0.5072232364333589, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.00546875, "calibration/coverage@15%": 0.005859375, "calibration/coverage@20%": 0.016015625, "calibration/coverage@25%": 0.036328125, "calibration/coverage@30%": 0.231640625, "calibration/coverage@5%": 0.00546875, "calibration/ece": 0.1370660257229794, "calibration/mean_confidence": 0.5252956247397285, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 644.0, "completions/max_terminated_length": 644.0, "completions/mean_length": 225.1185546875, "completions/mean_terminated_length": 225.20532836914063, "completions/min_length": 66.0, "completions/min_terminated_length": 101.8, "epoch": 0.928, "grad_norm": 0.0007230278570204973, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 974210424.0, "reward": 0.9181464910507202, "reward_std": 0.08190946877002717, "rewards/accuracy_reward": 0.52734375, "rewards/brier_reward": 0.7798070788383484, "rewards/confidence_uniqueness_reward": 0.9533717632293701, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0034818340092897413, "rewards/frontier_ece_reward": 0.003747813869267702, "rewards/frontier_entropy_batch_reward": -0.19755975306034088, "rewards/volume_coverage_0": 2.919360059250309e-09, "rewards/volume_coverage_1": 2.919360059250309e-09, "rewards/volume_coverage_10": 3.7500787586353114e-09, "rewards/volume_coverage_15": 1.1286248764008633e-06, "rewards/volume_coverage_20": 0.000972931594151305, "rewards/volume_coverage_25": 0.0067935499362647535, "rewards/volume_coverage_5": 2.919360059250309e-09, "signal/accuracy_reward/centered_abs_mean": 0.081298828125, "signal/accuracy_reward/group_std_mean": 0.10915876477956772, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0406494140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0406494140625, "signal/advantage_abs_mean": 0.06298240423202514, "signal/advantage_pre_scale_abs_mean": 0.06298240423202514, "signal/advantage_pre_scale_std": 0.10488016307353973, "signal/advantage_std": 0.10488016307353973, "signal/brier_reward/centered_abs_mean": 0.12703848332166673, "signal/brier_reward/group_std_mean": 0.16295638978481292, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012703849002718925, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012703849002718925, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011908646300435066, "signal/confidence_uniqueness_reward/group_std_mean": 0.015822481364011765, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011908646440133453, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011908646440133453, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034886215813457965, "signal/frontier_aurc_reward/group_std_mean": 0.005800313968211413, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.360777020337991e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.360777020337991e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0073123440146446225, "signal/frontier_ece_reward/group_std_mean": 0.009510249830782413, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007312344270758331, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007312344270758331, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26927927136421204, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3435124158859253, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026927927508950233, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026927927508950233, "signal/volume_coverage_0/centered_abs_mean": 4.7993638396803816e-09, "signal/volume_coverage_0/group_std_mean": 6.0747774810465674e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.79375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.799363739760309e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.799363739760309e-10, "signal/volume_coverage_1/centered_abs_mean": 4.7993638396803816e-09, "signal/volume_coverage_1/group_std_mean": 6.0747774810465674e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.79375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.799363739760309e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.799363739760309e-10, "signal/volume_coverage_10/centered_abs_mean": 2.0036284098523537e-08, "signal/volume_coverage_10/group_std_mean": 2.5118979429805678e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.703125, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0036283521207566e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.0036283521207566e-09, "signal/volume_coverage_15/centered_abs_mean": 2.117918847943656e-06, "signal/volume_coverage_15/group_std_mean": 2.70749264927872e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.01875, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1179188820497074e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.1179188820497074e-07, "signal/volume_coverage_20/centered_abs_mean": 0.001639655651524663, "signal/volume_coverage_20/group_std_mean": 0.0021060988772660496, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00016396556748077273, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00016396556748077273, "signal/volume_coverage_25/centered_abs_mean": 0.009390851110219955, "signal/volume_coverage_25/group_std_mean": 0.012028184160590172, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009390851017087698, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009390851017087698, "signal/volume_coverage_5/centered_abs_mean": 4.7993638396803816e-09, "signal/volume_coverage_5/group_std_mean": 6.0747774810465674e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.79375, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.799363739760309e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.799363739760309e-10, "step": 290 }, { "calibration/aurc": 0.2707380283968821, "calibration/batch_distribution_entropy": 0.9904177652775248, "calibration/buffer_distribution_entropy": 0.9992103669769856, "calibration/confidence_entropy": 0.5174462415283381, "calibration/coverage@0%": 0.035546875, "calibration/coverage@1%": 0.055078125, "calibration/coverage@10%": 0.1703125, "calibration/coverage@15%": 0.276171875, "calibration/coverage@20%": 0.37265625, "calibration/coverage@25%": 0.453515625, "calibration/coverage@30%": 0.575390625, "calibration/coverage@5%": 0.10078125, "calibration/ece": 0.08772153547236908, "calibration/mean_confidence": 0.4955904576824491, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 545.2, "completions/max_terminated_length": 545.2, "completions/mean_length": 227.26142578125, "completions/mean_terminated_length": 227.3970184326172, "completions/min_length": 37.4, "completions/min_terminated_length": 99.4, "epoch": 0.944, "grad_norm": 0.0008903025300242007, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 991513005.0, "reward": 0.9210011959075928, "reward_std": 0.09348525255918502, "rewards/accuracy_reward": 0.53515625, "rewards/brier_reward": 0.7760526895523071, "rewards/confidence_uniqueness_reward": 0.9528594970703125, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003286689938977361, "rewards/frontier_ece_reward": 0.0035630079917609693, "rewards/frontier_entropy_batch_reward": -0.2010483294725418, "rewards/volume_coverage_0": 9.560014141429817e-10, "rewards/volume_coverage_1": 9.560014141429817e-10, "rewards/volume_coverage_10": 1.3513594021574705e-09, "rewards/volume_coverage_15": 1.10043815766403e-06, "rewards/volume_coverage_20": 0.0008068614755757153, "rewards/volume_coverage_25": 0.005824728962033987, "rewards/volume_coverage_5": 9.560014141429817e-10, "signal/accuracy_reward/centered_abs_mean": 0.10322265625, "signal/accuracy_reward/group_std_mean": 0.1366787388920784, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051611328125, "signal/advantage_abs_mean": 0.07175759673118591, "signal/advantage_pre_scale_abs_mean": 0.07175759673118591, "signal/advantage_pre_scale_std": 0.11863639056682587, "signal/advantage_std": 0.11863639056682587, "signal/brier_reward/centered_abs_mean": 0.12774860262870788, "signal/brier_reward/group_std_mean": 0.16281991302967072, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012774860113859176, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012774860113859176, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012914423458278179, "signal/confidence_uniqueness_reward/group_std_mean": 0.01821254752576351, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012914423597976566, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012914423597976566, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031198048498481514, "signal/frontier_aurc_reward/group_std_mean": 0.005187831167131662, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.899756156897638e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.899756156897638e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007306762877851725, "signal/frontier_ece_reward/group_std_mean": 0.009607454948127269, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007306763087399304, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007306763087399304, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2817619800567627, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3556141793727875, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028176198527216912, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028176198527216912, "signal/volume_coverage_0/centered_abs_mean": 4.504470840771546e-09, "signal/volume_coverage_0/group_std_mean": 5.734404773249935e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.796875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.5044708518737763e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.5044708518737763e-10, "signal/volume_coverage_1/centered_abs_mean": 4.504470840771546e-09, "signal/volume_coverage_1/group_std_mean": 5.734404773249935e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.796875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.5044708518737763e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.5044708518737763e-10, "signal/volume_coverage_10/centered_abs_mean": 8.61983622257867e-09, "signal/volume_coverage_10/group_std_mean": 1.0975744579866387e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.765625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.619836289192051e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.619836289192051e-10, "signal/volume_coverage_15/centered_abs_mean": 2.301765857737337e-06, "signal/volume_coverage_15/group_std_mean": 2.931733843070106e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.203125, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.3017659884772e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.3017659884772e-07, "signal/volume_coverage_20/centered_abs_mean": 0.001643260568380356, "signal/volume_coverage_20/group_std_mean": 0.0021156548289582135, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0001643260649871081, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0001643260649871081, "signal/volume_coverage_25/centered_abs_mean": 0.009385128132998943, "signal/volume_coverage_25/group_std_mean": 0.01200024802237749, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009385128272697329, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009385128272697329, "signal/volume_coverage_5/centered_abs_mean": 4.504470840771546e-09, "signal/volume_coverage_5/group_std_mean": 5.734404773249935e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.796875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.5044708518737763e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.5044708518737763e-10, "step": 295 }, { "calibration/aurc": 0.3539351773524442, "calibration/batch_distribution_entropy": 0.9919915695023717, "calibration/buffer_distribution_entropy": 0.9992684340977069, "calibration/confidence_entropy": 0.5006099573341842, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.0296875, "calibration/coverage@15%": 0.111328125, "calibration/coverage@20%": 0.241796875, "calibration/coverage@25%": 0.295703125, "calibration/coverage@30%": 0.346484375, "calibration/coverage@5%": 0.012890625, "calibration/ece": 0.13862321886817325, "calibration/mean_confidence": 0.5191145025524813, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 659.0, "completions/max_terminated_length": 659.0, "completions/mean_length": 226.6712890625, "completions/mean_terminated_length": 226.7602111816406, "completions/min_length": 21.4, "completions/min_terminated_length": 102.2, "epoch": 0.96, "grad_norm": 0.0007685109740123153, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 1008774439.0, "reward": 0.9186918616294861, "reward_std": 0.07693372666835785, "rewards/accuracy_reward": 0.52412109375, "rewards/brier_reward": 0.7954145908355713, "rewards/confidence_uniqueness_reward": 0.9527331829071045, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0033720153383910655, "rewards/frontier_ece_reward": 0.004499087203294038, "rewards/frontier_entropy_batch_reward": -0.19327735304832458, "rewards/volume_coverage_0": 1.9510296711544584e-09, "rewards/volume_coverage_1": 1.9510296711544584e-09, "rewards/volume_coverage_10": 1.2793440679059741e-08, "rewards/volume_coverage_15": 1.9328432472320856e-06, "rewards/volume_coverage_20": 0.0012584096053615212, "rewards/volume_coverage_25": 0.008058086410164833, "rewards/volume_coverage_5": 1.9510296711544584e-09, "signal/accuracy_reward/centered_abs_mean": 0.071441650390625, "signal/accuracy_reward/group_std_mean": 0.09916180819272995, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0357208251953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0357208251953125, "signal/advantage_abs_mean": 0.05721310302615166, "signal/advantage_pre_scale_abs_mean": 0.05721310302615166, "signal/advantage_pre_scale_std": 0.10052263587713242, "signal/advantage_std": 0.10052263587713242, "signal/brier_reward/centered_abs_mean": 0.11715176105499267, "signal/brier_reward/group_std_mean": 0.152744123339653, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011715176329016686, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011715176329016686, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012735397927463055, "signal/confidence_uniqueness_reward/group_std_mean": 0.017090072110295295, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012735398719087242, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012735398719087242, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031671120319515466, "signal/frontier_aurc_reward/group_std_mean": 0.00535587165504694, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.958890083595179e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.958890083595179e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007282440643757581, "signal/frontier_ece_reward/group_std_mean": 0.009441747888922691, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007282441016286612, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007282441016286612, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2648331612348557, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33835762143135073, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026483316719532014, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026483316719532014, "signal/volume_coverage_0/centered_abs_mean": 3.5475106408000555e-09, "signal/volume_coverage_0/group_std_mean": 4.6579573975691345e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.840625, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.547510674106746e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.547510674106746e-10, "signal/volume_coverage_1/centered_abs_mean": 3.5475106408000555e-09, "signal/volume_coverage_1/group_std_mean": 4.6579573975691345e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.840625, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.547510674106746e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.547510674106746e-10, "signal/volume_coverage_10/centered_abs_mean": 2.4543501098150956e-08, "signal/volume_coverage_10/group_std_mean": 3.1358744401188686e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.721875, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.4543501453422324e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.4543501453422324e-09, "signal/volume_coverage_15/centered_abs_mean": 2.320456633242429e-06, "signal/volume_coverage_15/group_std_mean": 2.9677721840926096e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.1, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.3204566730328223e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.3204566730328223e-07, "signal/volume_coverage_20/centered_abs_mean": 0.001473587960936129, "signal/volume_coverage_20/group_std_mean": 0.0019304967951029538, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00014735879667568952, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00014735879667568952, "signal/volume_coverage_25/centered_abs_mean": 0.008963228948414326, "signal/volume_coverage_25/group_std_mean": 0.011512291803956031, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0008963228901848197, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0008963228901848197, "signal/volume_coverage_5/centered_abs_mean": 3.5475106408000555e-09, "signal/volume_coverage_5/group_std_mean": 4.6579573975691345e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.840625, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.547510674106746e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.547510674106746e-10, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.469143428109443, "eval_calibration/batch_distribution_entropy": 0.9061692855550176, "eval_calibration/buffer_distribution_entropy": 0.9992889529921487, "eval_calibration/confidence_entropy": 0.5129001107673137, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.0703125, "eval_calibration/coverage@20%": 0.0859375, "eval_calibration/coverage@25%": 0.1015625, "eval_calibration/coverage@30%": 0.2109375, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.19129152092641719, "eval_calibration/mean_confidence": 0.47514103987281153, "eval_completions/clipped_ratio": 0.002155172413793094, "eval_completions/max_length": 440.5, "eval_completions/max_terminated_length": 440.5, "eval_completions/mean_length": 227.2459602355957, "eval_completions/mean_terminated_length": 227.75753784179688, "eval_completions/min_length": 82.5, "eval_completions/min_terminated_length": 118.0, "eval_loss": 0.0, "eval_num_tokens": 1008774439.0, "eval_reward": 0.7893799394369125, "eval_reward_std": 0.2509300038218498, "eval_rewards/accuracy_reward": 0.443359375, "eval_rewards/brier_reward": 0.7772131115198135, "eval_rewards/confidence_uniqueness_reward": 0.8947123885154724, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.004324580018874258, "eval_rewards/frontier_ece_reward": 0.004353664699010551, "eval_rewards/frontier_entropy_batch_reward": -0.998046875, "eval_rewards/volume_coverage_0": 4.074710624468736e-09, "eval_rewards/volume_coverage_1": 4.074710624468736e-09, "eval_rewards/volume_coverage_10": 7.91095577934442e-09, "eval_rewards/volume_coverage_15": 9.340307514094093e-07, "eval_rewards/volume_coverage_20": 0.0014538196846842766, "eval_rewards/volume_coverage_25": 0.007621690630912781, "eval_rewards/volume_coverage_5": 4.074710624468736e-09, "eval_runtime": 30.3472, "eval_samples_per_second": 16.476, "eval_signal/accuracy_reward/centered_abs_mean": 0.4776611328125, "eval_signal/accuracy_reward/group_std_mean": 0.49625900387763977, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23883056640625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23883056640625, "eval_signal/advantage_abs_mean": 0.23880807682871819, "eval_signal/advantage_pre_scale_abs_mean": 0.23880807682871819, "eval_signal/advantage_pre_scale_std": 0.2480723336338997, "eval_signal/advantage_std": 0.2480723336338997, "eval_signal/brier_reward/centered_abs_mean": 0.1952587403357029, "eval_signal/brier_reward/group_std_mean": 0.2449491173028946, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019525874871760607, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019525874871760607, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04094819072633982, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05267652776092291, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004094819072633982, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004094819072633982, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005890812375582755, "eval_signal/frontier_aurc_reward/group_std_mean": 0.012180331395938993, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.363515578617807e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.363515578617807e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.009545863838866353, "eval_signal/frontier_ece_reward/group_std_mean": 0.012291400227695704, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009545864013489336, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009545864013489336, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983, "eval_signal/volume_coverage_0/centered_abs_mean": 1.0248968562720506e-08, "eval_signal/volume_coverage_0/group_std_mean": 1.2958092820980482e-08, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.5625, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.0248968618231658e-09, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 1.0248968618231658e-09, "eval_signal/volume_coverage_1/centered_abs_mean": 1.0248968562720506e-08, "eval_signal/volume_coverage_1/group_std_mean": 1.2958092820980482e-08, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.5625, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.0248968618231658e-09, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 1.0248968618231658e-09, "eval_signal/volume_coverage_10/centered_abs_mean": 2.0943740519641096e-08, "eval_signal/volume_coverage_10/group_std_mean": 2.6666832741994995e-08, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.5625, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0943741740886423e-09, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 2.0943741740886423e-09, "eval_signal/volume_coverage_15/centered_abs_mean": 2.8810036951654183e-06, "eval_signal/volume_coverage_15/group_std_mean": 3.6288600995249e-06, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.25, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.881003808852256e-07, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 2.881003808852256e-07, "eval_signal/volume_coverage_20/centered_abs_mean": 0.003215643868315965, "eval_signal/volume_coverage_20/group_std_mean": 0.004141232697293162, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00032156440283870324, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 0.00032156440283870324, "eval_signal/volume_coverage_25/centered_abs_mean": 0.01439478388056159, "eval_signal/volume_coverage_25/group_std_mean": 0.018871094100177288, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0014394783938769251, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.0014394783938769251, "eval_signal/volume_coverage_5/centered_abs_mean": 1.0248968562720506e-08, "eval_signal/volume_coverage_5/group_std_mean": 1.2958092820980482e-08, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.5625, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.0248968618231658e-09, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 1.0248968618231658e-09, "eval_steps_per_second": 0.132, "step": 300 }, { "calibration/aurc": 0.27828423584262174, "calibration/batch_distribution_entropy": 0.9783725227243985, "calibration/buffer_distribution_entropy": 0.9993470822495265, "calibration/confidence_entropy": 0.5214416379067564, "calibration/coverage@0%": 0.014858274217221135, "calibration/coverage@1%": 0.014858274217221135, "calibration/coverage@10%": 0.18521740459882582, "calibration/coverage@15%": 0.32198737157534246, "calibration/coverage@20%": 0.4349055161448141, "calibration/coverage@25%": 0.49781525195694715, "calibration/coverage@30%": 0.5665874204990216, "calibration/coverage@5%": 0.0633011252446184, "calibration/ece": 0.13189558718940492, "calibration/mean_confidence": 0.5073180815615211, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 697.6, "completions/max_terminated_length": 697.6, "completions/mean_length": 227.09482421875, "completions/mean_terminated_length": 227.33677673339844, "completions/min_length": 18.8, "completions/min_terminated_length": 99.0, "epoch": 0.976, "grad_norm": 0.001216869568452239, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 1025961010.0, "reward": 0.9318997025489807, "reward_std": 0.08647259920835496, "rewards/accuracy_reward": 0.5517578125, "rewards/brier_reward": 0.7864073157310486, "rewards/confidence_uniqueness_reward": 0.952651071548462, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.0029323163442313673, "rewards/frontier_ece_reward": 0.0035886369296349585, "rewards/frontier_entropy_batch_reward": -0.18277476131916046, "rewards/volume_coverage_0": 9.206329920630196e-10, "rewards/volume_coverage_1": 9.206329920630196e-10, "rewards/volume_coverage_10": 4.901805539248017e-09, "rewards/volume_coverage_15": 1.2830848845624132e-06, "rewards/volume_coverage_20": 0.0008759050746448338, "rewards/volume_coverage_25": 0.00666112988255918, "rewards/volume_coverage_5": 9.206329920630196e-10, "signal/accuracy_reward/centered_abs_mean": 0.09208984375, "signal/accuracy_reward/group_std_mean": 0.1234636165201664, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046044921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.046044921875, "signal/advantage_abs_mean": 0.06493477523326874, "signal/advantage_pre_scale_abs_mean": 0.06493477523326874, "signal/advantage_pre_scale_std": 0.11201283037662506, "signal/advantage_std": 0.11201283037662506, "signal/brier_reward/centered_abs_mean": 0.12516404688358307, "signal/brier_reward/group_std_mean": 0.162343767285347, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012516404874622822, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012516404874622822, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013118837960064411, "signal/confidence_uniqueness_reward/group_std_mean": 0.01964471973478794, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013118838891386987, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013118838891386987, "signal/format_reward/centered_abs_mean": 0.00257568359375, "signal/format_reward/group_std_mean": 0.006574305240064859, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001287841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001287841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027899232693016527, "signal/frontier_aurc_reward/group_std_mean": 0.0048869956284761425, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4874041375587694e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4874041375587694e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0070959897711873055, "signal/frontier_ece_reward/group_std_mean": 0.009288905560970307, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007095989771187305, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007095989771187305, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24910698533058168, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32469738125801084, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02491069883108139, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02491069883108139, "signal/volume_coverage_0/centered_abs_mean": 5.720340201520457e-09, "signal/volume_coverage_0/group_std_mean": 7.311743388527248e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.79375, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.72034031254276e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.72034031254276e-10, "signal/volume_coverage_1/centered_abs_mean": 5.720340201520457e-09, "signal/volume_coverage_1/group_std_mean": 7.311743388527248e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.79375, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.72034031254276e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.72034031254276e-10, "signal/volume_coverage_10/centered_abs_mean": 1.532603484122319e-08, "signal/volume_coverage_10/group_std_mean": 1.9649526628029434e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.64375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.5326034663587507e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.5326034663587507e-09, "signal/volume_coverage_15/centered_abs_mean": 1.4135470792098203e-06, "signal/volume_coverage_15/group_std_mean": 1.8063871948470479e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.059375, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4135470536302818e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.4135470536302818e-07, "signal/volume_coverage_20/centered_abs_mean": 0.001932887057773769, "signal/volume_coverage_20/group_std_mean": 0.0025168229360133408, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00019328870403114706, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00019328870403114706, "signal/volume_coverage_25/centered_abs_mean": 0.009515535458922387, "signal/volume_coverage_25/group_std_mean": 0.012358383275568485, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009515536017715931, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009515536017715931, "signal/volume_coverage_5/centered_abs_mean": 5.720340201520457e-09, "signal/volume_coverage_5/group_std_mean": 7.311743388527248e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.79375, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.72034031254276e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.72034031254276e-10, "step": 305 }, { "calibration/aurc": 0.3711929587850343, "calibration/batch_distribution_entropy": 0.9894443021341646, "calibration/buffer_distribution_entropy": 0.9993375714297741, "calibration/confidence_entropy": 0.510071766819145, "calibration/coverage@0%": 0.01015625, "calibration/coverage@1%": 0.01015625, "calibration/coverage@10%": 0.059375, "calibration/coverage@15%": 0.087109375, "calibration/coverage@20%": 0.11171875, "calibration/coverage@25%": 0.161328125, "calibration/coverage@30%": 0.323828125, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.14348603278692526, "calibration/mean_confidence": 0.4779751045544707, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 504.2, "completions/max_terminated_length": 504.2, "completions/mean_length": 219.91044921875, "completions/mean_terminated_length": 220.01998596191407, "completions/min_length": 58.8, "completions/min_terminated_length": 98.2, "epoch": 0.992, "grad_norm": 0.0009316056966781616, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 1043341373.0, "reward": 0.92005295753479, "reward_std": 0.08265489488840103, "rewards/accuracy_reward": 0.53046875, "rewards/brier_reward": 0.7892962336540222, "rewards/confidence_uniqueness_reward": 0.9527257204055786, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0031668921466916798, "rewards/frontier_ece_reward": 0.0035965410992503167, "rewards/frontier_entropy_batch_reward": -0.20363328158855437, "rewards/volume_coverage_0": 2.720843217396407e-09, "rewards/volume_coverage_1": 2.720843217396407e-09, "rewards/volume_coverage_10": 1.035092784817948e-08, "rewards/volume_coverage_15": 1.4946397698167856e-06, "rewards/volume_coverage_20": 0.001336311805061996, "rewards/volume_coverage_25": 0.007699974346905947, "rewards/volume_coverage_5": 2.720843217396407e-09, "signal/accuracy_reward/centered_abs_mean": 0.08271484375, "signal/accuracy_reward/group_std_mean": 0.10999367833137512, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041357421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.041357421875, "signal/advantage_abs_mean": 0.06338016986846924, "signal/advantage_pre_scale_abs_mean": 0.06338016986846924, "signal/advantage_pre_scale_std": 0.10698268860578537, "signal/advantage_std": 0.10698268860578537, "signal/brier_reward/centered_abs_mean": 0.12253952324390412, "signal/brier_reward/group_std_mean": 0.15773009061813353, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012253952585160733, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012253952585160733, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01234852969646454, "signal/confidence_uniqueness_reward/group_std_mean": 0.016739832423627376, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001234852964989841, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001234852964989841, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_std_mean": 0.0024258273653686045, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002989071374759078, "signal/frontier_aurc_reward/group_std_mean": 0.005077757174149156, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.736339276656508e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.736339276656508e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006974827032536268, "signal/frontier_ece_reward/group_std_mean": 0.009086176566779614, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006974827032536268, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006974827032536268, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27312966585159304, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34787346720695494, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02731296643614769, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02731296643614769, "signal/volume_coverage_0/centered_abs_mean": 5.004535186259318e-09, "signal/volume_coverage_0/group_std_mean": 6.371262450954873e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.7875, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.004535175157087e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.004535175157087e-10, "signal/volume_coverage_1/centered_abs_mean": 5.004535186259318e-09, "signal/volume_coverage_1/group_std_mean": 6.371262450954873e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.7875, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.004535175157087e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.004535175157087e-10, "signal/volume_coverage_10/centered_abs_mean": 1.767177568723355e-08, "signal/volume_coverage_10/group_std_mean": 2.240396206332207e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.50625, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.76717753763711e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.76717753763711e-09, "signal/volume_coverage_15/centered_abs_mean": 1.7186309264616284e-06, "signal/volume_coverage_15/group_std_mean": 2.15395040754629e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.7186309761996199e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.7186309761996199e-07, "signal/volume_coverage_20/centered_abs_mean": 0.002075748727656901, "signal/volume_coverage_20/group_std_mean": 0.002644157502800226, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00020757487509399654, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00020757487509399654, "signal/volume_coverage_25/centered_abs_mean": 0.01014525257050991, "signal/volume_coverage_25/group_std_mean": 0.012951592169702052, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0010145252919755876, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0010145252919755876, "signal/volume_coverage_5/centered_abs_mean": 5.004535186259318e-09, "signal/volume_coverage_5/group_std_mean": 6.371262450954873e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.7875, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.004535175157087e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.004535175157087e-10, "step": 310 }, { "calibration/aurc": 0.29983717719854924, "calibration/batch_distribution_entropy": 0.9600171414500873, "calibration/buffer_distribution_entropy": 0.9993445118220194, "calibration/confidence_entropy": 0.4847878699637469, "calibration/coverage@0%": 0.0126953125, "calibration/coverage@1%": 0.0126953125, "calibration/coverage@10%": 0.041015625, "calibration/coverage@15%": 0.060546875, "calibration/coverage@20%": 0.1005859375, "calibration/coverage@25%": 0.359375, "calibration/coverage@30%": 0.607421875, "calibration/coverage@5%": 0.03515625, "calibration/ece": 0.16615515511103873, "calibration/mean_confidence": 0.5929890637724151, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000244140625, "completions/max_length": 505.5, "completions/max_terminated_length": 505.5, "completions/mean_length": 218.71721649169922, "completions/mean_terminated_length": 218.77042388916016, "completions/min_length": 46.5, "completions/min_terminated_length": 87.5, "epoch": 0.9984, "num_tokens": 1050240623.0, "reward": 0.9275875687599182, "reward_std": 0.0807461366057396, "rewards/accuracy_reward": 0.55322265625, "rewards/brier_reward": 0.7675114274024963, "rewards/confidence_uniqueness_reward": 0.9522386491298676, "rewards/format_reward": 0.999755859375, "rewards/frontier_aurc_reward": -0.003080901689827442, "rewards/frontier_ece_reward": 0.002699983073398471, "rewards/frontier_entropy_batch_reward": -0.21747954189777374, "rewards/volume_coverage_0": 2.083941741393147e-09, "rewards/volume_coverage_1": 2.083941741393147e-09, "rewards/volume_coverage_10": 1.1432189417348582e-08, "rewards/volume_coverage_15": -8.116828098536644e-08, "rewards/volume_coverage_20": 0.0009809440525714308, "rewards/volume_coverage_25": 0.005417217034846544, "rewards/volume_coverage_5": 2.083941741393147e-09, "signal/accuracy_reward/centered_abs_mean": 0.075653076171875, "signal/accuracy_reward/group_std_mean": 0.10233127698302269, "signal/accuracy_reward/group_zero_std_frac": 0.6953125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0378265380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0378265380859375, "signal/advantage_abs_mean": 0.06188248656690121, "signal/advantage_pre_scale_abs_mean": 0.06188248656690121, "signal/advantage_pre_scale_std": 0.10488305985927582, "signal/advantage_std": 0.10488305985927582, "signal/brier_reward/centered_abs_mean": 0.12014567106962204, "signal/brier_reward/group_std_mean": 0.1542070060968399, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012014567852020264, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012014567852020264, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011747919954359531, "signal/confidence_uniqueness_reward/group_std_mean": 0.015483672730624676, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011747920652851462, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011747920652851462, "signal/format_reward/centered_abs_mean": 0.0004730224609375, "signal/format_reward/group_std_mean": 0.0013810679083690047, "signal/format_reward/group_zero_std_frac": 0.9921875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002737033413723111, "signal/frontier_aurc_reward/group_std_mean": 0.004487208207137883, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4212920581921935e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4212920581921935e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006887981900945306, "signal/frontier_ece_reward/group_std_mean": 0.008894495666027069, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006887982017360628, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006887982017360628, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2844991385936737, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3585272878408432, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028449914418160915, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028449914418160915, "signal/volume_coverage_0/centered_abs_mean": 4.031929234926679e-09, "signal/volume_coverage_0/group_std_mean": 5.142488657128297e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8125, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.031929234926679e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.031929234926679e-10, "signal/volume_coverage_1/centered_abs_mean": 4.031929234926679e-09, "signal/volume_coverage_1/group_std_mean": 5.142488657128297e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8125, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.031929234926679e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.031929234926679e-10, "signal/volume_coverage_10/centered_abs_mean": 1.602587573756864e-08, "signal/volume_coverage_10/group_std_mean": 2.028727230651839e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.5234375, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.602587618165785e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.602587618165785e-09, "signal/volume_coverage_15/centered_abs_mean": 1.2426704643075936e-06, "signal/volume_coverage_15/group_std_mean": 1.5521810041718709e-06, "signal/volume_coverage_15/group_zero_std_frac": 0.015625, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.2426705353618672e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.2426705353618672e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0020069401944056153, "signal/volume_coverage_20/group_std_mean": 0.0025554284220561385, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00020069401944056153, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00020069401944056153, "signal/volume_coverage_25/centered_abs_mean": 0.009221313055604696, "signal/volume_coverage_25/group_std_mean": 0.011924784164875746, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0009221313230227679, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0009221313230227679, "signal/volume_coverage_5/centered_abs_mean": 4.031929234926679e-09, "signal/volume_coverage_5/group_std_mean": 5.142488657128297e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.8125, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.031929234926679e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.031929234926679e-10, "step": 312, "total_flos": 0.0, "train_loss": 1.2157592629172052e-05, "train_runtime": 59916.4251, "train_samples_per_second": 0.334, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1050240623, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }