{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6250458868239556, "calibration/batch_distribution_entropy": 0.6536619016238594, "calibration/confidence_entropy": 0.3506516141955464, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48844069501450277, "calibration/mean_confidence": 0.7871118547273157, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03759765625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1517.0, "completions/mean_length": 272.25546875, "completions/mean_terminated_length": 222.88035583496094, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.16206742823123932, "learning_rate": 3.1249999999999997e-07, "loss": 0.0938, "num_tokens": 17631928.0, "reward": 0.6381880164146423, "reward_std": 0.4766306221485138, "rewards/accuracy_reward": 0.22021484375, "rewards/brier_reward": 0.37847437858581545, "rewards/confidence_uniqueness_reward": 0.4914248585700989, "rewards/format_reward": 0.6865234375, "rewards/frontier_aurc_reward": 0.3039612889289856, "rewards/frontier_coverage_1": 0.3039612889289856, "rewards/frontier_coverage_10": 0.3039612889289856, "rewards/frontier_coverage_15": 0.3039612889289856, "rewards/frontier_coverage_20": 0.3039612889289856, "rewards/frontier_coverage_25": 0.3039612889289856, "rewards/frontier_coverage_5": 0.3039612889289856, "rewards/frontier_ece_reward": 0.3039612889289856, "signal/accuracy_reward/centered_abs_mean": 0.241912841796875, "signal/accuracy_reward/group_std_mean": 0.2817725300788879, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1209564208984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1209564208984375, "signal/advantage_abs_mean": 0.40599689483642576, "signal/advantage_pre_scale_abs_mean": 0.40599689483642576, "signal/advantage_pre_scale_std": 0.48591635227203367, "signal/advantage_std": 0.48591635227203367, "signal/brier_reward/centered_abs_mean": 0.32122361063957217, "signal/brier_reward/group_std_mean": 0.36595953106880186, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04015295132994652, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04015295132994652, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2976445615291595, "signal/confidence_uniqueness_reward/group_std_mean": 0.3497284233570099, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03720557019114494, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03720557019114494, "signal/format_reward/centered_abs_mean": 0.40189208984375, "signal/format_reward/group_std_mean": 0.4530576765537262, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.200946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.200946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.2956218898296356, "signal/frontier_aurc_reward/group_std_mean": 0.3452231645584106, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_1/centered_abs_mean": 0.2956218898296356, "signal/frontier_coverage_1/group_std_mean": 0.3452231645584106, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_10/centered_abs_mean": 0.2956218898296356, "signal/frontier_coverage_10/group_std_mean": 0.3452231645584106, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_15/centered_abs_mean": 0.2956218898296356, "signal/frontier_coverage_15/group_std_mean": 0.3452231645584106, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_20/centered_abs_mean": 0.2956218898296356, "signal/frontier_coverage_20/group_std_mean": 0.3452231645584106, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_25/centered_abs_mean": 0.2956218898296356, "signal/frontier_coverage_25/group_std_mean": 0.3452231645584106, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_5/centered_abs_mean": 0.2956218898296356, "signal/frontier_coverage_5/group_std_mean": 0.3452231645584106, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005291631631553173, "signal/frontier_ece_reward/centered_abs_mean": 0.2956218898296356, "signal/frontier_ece_reward/group_std_mean": 0.3452231645584106, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03695273622870445, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03695273622870445, "step": 5 }, { "calibration/aurc": 0.6812939989066674, "calibration/batch_distribution_entropy": 0.651988259109008, "calibration/confidence_entropy": 0.3465823907555327, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5245675857124956, "calibration/mean_confidence": 0.7919402249592102, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0349609375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1516.4, "completions/mean_length": 260.02294921875, "completions/mean_terminated_length": 213.81815490722656, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.052768442779779434, "learning_rate": 6.249999999999999e-07, "loss": 0.0933, "num_tokens": 35394915.0, "reward": 0.6551937222480774, "reward_std": 0.44631595611572267, "rewards/accuracy_reward": 0.21103515625, "rewards/brier_reward": 0.3817343056201935, "rewards/confidence_uniqueness_reward": 0.5185160636901855, "rewards/format_reward": 0.72353515625, "rewards/frontier_aurc_reward": 0.30114771127700807, "rewards/frontier_coverage_1": 0.30114771127700807, "rewards/frontier_coverage_10": 0.30114771127700807, "rewards/frontier_coverage_15": 0.30114771127700807, "rewards/frontier_coverage_20": 0.30114771127700807, "rewards/frontier_coverage_25": 0.30114771127700807, "rewards/frontier_coverage_5": 0.30114771127700807, "rewards/frontier_ece_reward": 0.30114771127700807, "signal/accuracy_reward/centered_abs_mean": 0.218841552734375, "signal/accuracy_reward/group_std_mean": 0.26480883955955503, "signal/accuracy_reward/group_zero_std_frac": 0.334375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1094207763671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1094207763671875, "signal/advantage_abs_mean": 0.36836239099502566, "signal/advantage_pre_scale_abs_mean": 0.36836239099502566, "signal/advantage_pre_scale_std": 0.45523207187652587, "signal/advantage_std": 0.45523207187652587, "signal/brier_reward/centered_abs_mean": 0.30253963470458983, "signal/brier_reward/group_std_mean": 0.35128949880599974, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03781745433807373, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03781745433807373, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2798932909965515, "signal/confidence_uniqueness_reward/group_std_mean": 0.3376554548740387, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03498666137456894, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03498666137456894, "signal/format_reward/centered_abs_mean": 0.368145751953125, "signal/format_reward/group_std_mean": 0.4317262291908264, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1840728759765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1840728759765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.2753797650337219, "signal/frontier_aurc_reward/group_std_mean": 0.3300995469093323, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_1/centered_abs_mean": 0.2753797650337219, "signal/frontier_coverage_1/group_std_mean": 0.3300995469093323, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_10/centered_abs_mean": 0.2753797650337219, "signal/frontier_coverage_10/group_std_mean": 0.3300995469093323, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_15/centered_abs_mean": 0.2753797650337219, "signal/frontier_coverage_15/group_std_mean": 0.3300995469093323, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_20/centered_abs_mean": 0.2753797650337219, "signal/frontier_coverage_20/group_std_mean": 0.3300995469093323, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_25/centered_abs_mean": 0.2753797650337219, "signal/frontier_coverage_25/group_std_mean": 0.3300995469093323, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_5/centered_abs_mean": 0.2753797650337219, "signal/frontier_coverage_5/group_std_mean": 0.3300995469093323, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004929297603666782, "signal/frontier_ece_reward/centered_abs_mean": 0.2753797650337219, "signal/frontier_ece_reward/group_std_mean": 0.3300995469093323, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03442247062921524, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03442247062921524, "step": 10 }, { "calibration/aurc": 0.5846727034247318, "calibration/batch_distribution_entropy": 0.6331557206234091, "calibration/buffer_distribution_entropy": 0.6631927066094524, "calibration/confidence_entropy": 0.34227047283975975, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47909140338947587, "calibration/mean_confidence": 0.8098621162720414, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01513671875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1490.8, "completions/mean_length": 196.65634765625, "completions/mean_terminated_length": 176.16664123535156, "completions/min_length": 12.4, "completions/min_terminated_length": 12.4, "epoch": 0.048, "grad_norm": 0.03570015728473663, "learning_rate": 9.374999999999999e-07, "loss": 0.0544, "num_tokens": 52457412.0, "reward": 0.7937239408493042, "reward_std": 0.3412171393632889, "rewards/accuracy_reward": 0.26875, "rewards/brier_reward": 0.48464107513427734, "rewards/confidence_uniqueness_reward": 0.6457258105278015, "rewards/format_reward": 0.88681640625, "rewards/frontier_aurc_reward": 0.2983596006408334, "rewards/frontier_coverage_1": 0.3137420117855072, "rewards/frontier_coverage_10": 0.3137420117855072, "rewards/frontier_coverage_15": 0.3137420117855072, "rewards/frontier_coverage_20": 0.3137420117855072, "rewards/frontier_coverage_25": 0.3137420117855072, "rewards/frontier_coverage_5": 0.3137420117855072, "rewards/frontier_ece_reward": 0.2848668903112411, "signal/accuracy_reward/centered_abs_mean": 0.1978515625, "signal/accuracy_reward/group_std_mean": 0.24534497857093812, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09892578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09892578125, "signal/advantage_abs_mean": 0.25668698251247407, "signal/advantage_pre_scale_abs_mean": 0.25668698251247407, "signal/advantage_pre_scale_std": 0.35499538779258727, "signal/advantage_std": 0.35499538779258727, "signal/brier_reward/centered_abs_mean": 0.26803810596466066, "signal/brier_reward/group_std_mean": 0.3223264396190643, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03350476324558258, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03350476324558258, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19349431693553926, "signal/confidence_uniqueness_reward/group_std_mean": 0.25684032440185545, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024186789616942407, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024186789616942407, "signal/format_reward/centered_abs_mean": 0.186651611328125, "signal/format_reward/group_std_mean": 0.285383003950119, "signal/format_reward/group_zero_std_frac": 0.084375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0933258056640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0933258056640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.21558147557079793, "signal/frontier_aurc_reward/group_std_mean": 0.26004184521734713, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038589084782870487, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038589084782870487, "signal/frontier_coverage_1/centered_abs_mean": 0.23389289379119874, "signal/frontier_coverage_1/group_std_mean": 0.28874107003211974, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_10/centered_abs_mean": 0.23389289379119874, "signal/frontier_coverage_10/group_std_mean": 0.28874107003211974, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_15/centered_abs_mean": 0.23389289379119874, "signal/frontier_coverage_15/group_std_mean": 0.28874107003211974, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_20/centered_abs_mean": 0.23389289379119874, "signal/frontier_coverage_20/group_std_mean": 0.28874107003211974, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_25/centered_abs_mean": 0.23389289379119874, "signal/frontier_coverage_25/group_std_mean": 0.28874107003211974, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_5/centered_abs_mean": 0.23389289379119874, "signal/frontier_coverage_5/group_std_mean": 0.28874107003211974, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004186682868748903, "signal/frontier_ece_reward/centered_abs_mean": 0.23874643296003342, "signal/frontier_ece_reward/group_std_mean": 0.28933488130569457, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029843304120004178, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029843304120004178, "step": 15 }, { "calibration/aurc": 0.5276057163911909, "calibration/batch_distribution_entropy": 0.6998150529760003, "calibration/buffer_distribution_entropy": 0.6600571871120245, "calibration/confidence_entropy": 0.3813088817990363, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.36694634126724984, "calibration/mean_confidence": 0.7755121386863615, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003515625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1040.4, "completions/mean_length": 141.4296875, "completions/mean_terminated_length": 136.518115234375, "completions/min_length": 27.8, "completions/min_terminated_length": 27.8, "epoch": 0.064, "grad_norm": 0.01592865027487278, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 68824052.0, "reward": 0.8271668314933777, "reward_std": 0.21373497247695922, "rewards/accuracy_reward": 0.3455078125, "rewards/brier_reward": 0.5776345729827881, "rewards/confidence_uniqueness_reward": 0.7443219065666199, "rewards/format_reward": 0.97890625, "rewards/frontier_aurc_reward": -0.0066505827009677885, "rewards/frontier_coverage_1": 0.06369199305772781, "rewards/frontier_coverage_10": 0.06369199305772781, "rewards/frontier_coverage_15": 0.06369199305772781, "rewards/frontier_coverage_20": 0.06369199305772781, "rewards/frontier_coverage_25": 0.06369199305772781, "rewards/frontier_coverage_5": 0.06369199305772781, "rewards/frontier_ece_reward": -0.05605001614894718, "signal/accuracy_reward/centered_abs_mean": 0.20001220703125, "signal/accuracy_reward/group_std_mean": 0.2515187919139862, "signal/accuracy_reward/group_zero_std_frac": 0.3375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.100006103515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.100006103515625, "signal/advantage_abs_mean": 0.16012502908706666, "signal/advantage_pre_scale_abs_mean": 0.16012502908706666, "signal/advantage_pre_scale_std": 0.2318114757537842, "signal/advantage_std": 0.2318114757537842, "signal/brier_reward/centered_abs_mean": 0.23968503773212432, "signal/brier_reward/group_std_mean": 0.2950502038002014, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02996062971651554, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02996062971651554, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1224755346775055, "signal/confidence_uniqueness_reward/group_std_mean": 0.15922289788722993, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015309441834688187, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015309441834688187, "signal/format_reward/centered_abs_mean": 0.03966064453125, "signal/format_reward/group_std_mean": 0.09460565596818923, "signal/format_reward/group_zero_std_frac": 0.540625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019830322265625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.019830322265625, "signal/frontier_aurc_reward/centered_abs_mean": 0.004445481114089489, "signal/frontier_aurc_reward/group_std_mean": 0.0060803060419857505, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.957410998642445e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.957410998642445e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10291247516870498, "signal/frontier_coverage_1/group_std_mean": 0.15976795852184295, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_10/centered_abs_mean": 0.10291247516870498, "signal/frontier_coverage_10/group_std_mean": 0.15976795852184295, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_15/centered_abs_mean": 0.10291247516870498, "signal/frontier_coverage_15/group_std_mean": 0.15976795852184295, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_20/centered_abs_mean": 0.10291247516870498, "signal/frontier_coverage_20/group_std_mean": 0.15976795852184295, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_25/centered_abs_mean": 0.10291247516870498, "signal/frontier_coverage_25/group_std_mean": 0.15976795852184295, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_5/centered_abs_mean": 0.10291247516870498, "signal/frontier_coverage_5/group_std_mean": 0.15976795852184295, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018421332584694027, "signal/frontier_ece_reward/centered_abs_mean": 0.1306297332048416, "signal/frontier_ece_reward/group_std_mean": 0.16125866770744324, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0163287166506052, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0163287166506052, "step": 20 }, { "calibration/aurc": 0.6060577192384174, "calibration/batch_distribution_entropy": 0.7973205111041336, "calibration/buffer_distribution_entropy": 0.6869138037888193, "calibration/confidence_entropy": 0.44325387706996455, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4005991584031368, "calibration/mean_confidence": 0.7169919419052851, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1300.0, "completions/max_terminated_length": 864.4, "completions/mean_length": 121.075390625, "completions/mean_terminated_length": 120.24693145751954, "completions/min_length": 23.4, "completions/min_terminated_length": 23.4, "epoch": 0.08, "grad_norm": 0.019067738205194473, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 84997016.0, "reward": 0.8636497735977173, "reward_std": 0.1769395649433136, "rewards/accuracy_reward": 0.36455078125, "rewards/brier_reward": 0.6381376504898071, "rewards/confidence_uniqueness_reward": 0.8125494122505188, "rewards/format_reward": 0.99326171875, "rewards/frontier_aurc_reward": -0.005689960345625878, "rewards/frontier_coverage_1": 0.07561915218830109, "rewards/frontier_coverage_10": 0.07561915218830109, "rewards/frontier_coverage_15": 0.07561915218830109, "rewards/frontier_coverage_20": 0.07561915218830109, "rewards/frontier_coverage_25": 0.07561915218830109, "rewards/frontier_coverage_5": 0.07561915218830109, "rewards/frontier_ece_reward": -0.036895965412259105, "signal/accuracy_reward/centered_abs_mean": 0.188482666015625, "signal/accuracy_reward/group_std_mean": 0.2342788815498352, "signal/accuracy_reward/group_zero_std_frac": 0.38125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0942413330078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0942413330078125, "signal/advantage_abs_mean": 0.13878562450408935, "signal/advantage_pre_scale_abs_mean": 0.13878562450408935, "signal/advantage_pre_scale_std": 0.19872219264507293, "signal/advantage_std": 0.19872219264507293, "signal/brier_reward/centered_abs_mean": 0.21654031574726104, "signal/brier_reward/group_std_mean": 0.26848899722099306, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02706753946840763, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02706753946840763, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.074843430519104, "signal/confidence_uniqueness_reward/group_std_mean": 0.09928269833326339, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009355428814888, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009355428814888, "signal/format_reward/centered_abs_mean": 0.012176513671875, "signal/format_reward/group_std_mean": 0.029392263293266295, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0060882568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0060882568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033756108488887547, "signal/frontier_aurc_reward/group_std_mean": 0.004790552891790867, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.042343229637481e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.042343229637481e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1333490714430809, "signal/frontier_coverage_1/group_std_mean": 0.1931760311126709, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_10/centered_abs_mean": 0.1333490714430809, "signal/frontier_coverage_10/group_std_mean": 0.1931760311126709, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_15/centered_abs_mean": 0.1333490714430809, "signal/frontier_coverage_15/group_std_mean": 0.1931760311126709, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_20/centered_abs_mean": 0.1333490714430809, "signal/frontier_coverage_20/group_std_mean": 0.1931760311126709, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_25/centered_abs_mean": 0.1333490714430809, "signal/frontier_coverage_25/group_std_mean": 0.1931760311126709, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_5/centered_abs_mean": 0.1333490714430809, "signal/frontier_coverage_5/group_std_mean": 0.1931760311126709, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023869482800364496, "signal/frontier_ece_reward/centered_abs_mean": 0.11479663252830505, "signal/frontier_ece_reward/group_std_mean": 0.13996243476867676, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014349579066038131, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014349579066038131, "step": 25 }, { "calibration/aurc": 0.607214118060659, "calibration/batch_distribution_entropy": 0.8750953970046617, "calibration/buffer_distribution_entropy": 0.7317237070035948, "calibration/confidence_entropy": 0.5109259837341227, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.02109375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.30961384684766546, "calibration/mean_confidence": 0.6211195733784108, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1318.6, "completions/max_terminated_length": 528.2, "completions/mean_length": 122.15283203125, "completions/mean_terminated_length": 121.18623199462891, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.096, "grad_norm": 0.002318607410416007, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 101292469.0, "reward": 0.8805407524108887, "reward_std": 0.15506583750247954, "rewards/accuracy_reward": 0.3703125, "rewards/brier_reward": 0.682115888595581, "rewards/confidence_uniqueness_reward": 0.8357307076454162, "rewards/format_reward": 0.99765625, "rewards/frontier_aurc_reward": -0.005162287503480911, "rewards/frontier_coverage_1": 0.09234738796949386, "rewards/frontier_coverage_10": 0.09234738796949386, "rewards/frontier_coverage_15": 0.09234738796949386, "rewards/frontier_coverage_20": 0.09234738796949386, "rewards/frontier_coverage_25": 0.09234738796949386, "rewards/frontier_coverage_5": 0.09234738796949386, "rewards/frontier_ece_reward": -0.02400104545522481, "signal/accuracy_reward/centered_abs_mean": 0.17286376953125, "signal/accuracy_reward/group_std_mean": 0.2227681815624237, "signal/accuracy_reward/group_zero_std_frac": 0.390625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.086431884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.086431884765625, "signal/advantage_abs_mean": 0.12113675624132156, "signal/advantage_pre_scale_abs_mean": 0.12113675624132156, "signal/advantage_pre_scale_std": 0.17278285920619965, "signal/advantage_std": 0.17278285920619965, "signal/brier_reward/centered_abs_mean": 0.1992181122303009, "signal/brier_reward/group_std_mean": 0.24767497479915618, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02490226402878761, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02490226402878761, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07850513458251954, "signal/confidence_uniqueness_reward/group_std_mean": 0.1010200709104538, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009813141822814942, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009813141822814942, "signal/format_reward/centered_abs_mean": 0.00452880859375, "signal/format_reward/group_std_mean": 0.012921943468973041, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002264404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002264404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002785020461305976, "signal/frontier_aurc_reward/group_std_mean": 0.0042864244896918535, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.985186315025203e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.985186315025203e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17054200172424316, "signal/frontier_coverage_1/group_std_mean": 0.23477787971496583, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_10/centered_abs_mean": 0.17054200172424316, "signal/frontier_coverage_10/group_std_mean": 0.23477787971496583, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_15/centered_abs_mean": 0.17054200172424316, "signal/frontier_coverage_15/group_std_mean": 0.23477787971496583, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_20/centered_abs_mean": 0.17054200172424316, "signal/frontier_coverage_20/group_std_mean": 0.23477787971496583, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_25/centered_abs_mean": 0.17054200172424316, "signal/frontier_coverage_25/group_std_mean": 0.23477787971496583, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_5/centered_abs_mean": 0.17054200172424316, "signal/frontier_coverage_5/group_std_mean": 0.23477787971496583, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030527016613632442, "signal/frontier_ece_reward/centered_abs_mean": 0.09855036437511444, "signal/frontier_ece_reward/group_std_mean": 0.11995106637477874, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012318795546889305, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012318795546889305, "step": 30 }, { "calibration/aurc": 0.4567628620102736, "calibration/batch_distribution_entropy": 0.9037929812843206, "calibration/buffer_distribution_entropy": 0.782850349677702, "calibration/confidence_entropy": 0.5351940352803781, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.051953125, "calibration/coverage@5%": 0.0, "calibration/ece": 0.158923750965474, "calibration/mean_confidence": 0.538056644981475, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1076.8, "completions/max_terminated_length": 350.6, "completions/mean_length": 131.0107421875, "completions/mean_terminated_length": 130.59954223632812, "completions/min_length": 39.8, "completions/min_terminated_length": 39.8, "epoch": 0.112, "grad_norm": 0.0020931183826178312, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 117743491.0, "reward": 0.9162874221801758, "reward_std": 0.13698765337467195, "rewards/accuracy_reward": 0.41279296875, "rewards/brier_reward": 0.7357259631156922, "rewards/confidence_uniqueness_reward": 0.8506728529930114, "rewards/format_reward": 0.9978515625, "rewards/frontier_aurc_reward": -0.0044169268570840355, "rewards/frontier_coverage_1": 0.11645138710737228, "rewards/frontier_coverage_10": 0.11645138710737228, "rewards/frontier_coverage_15": 0.11645138710737228, "rewards/frontier_coverage_20": 0.11645138710737228, "rewards/frontier_coverage_25": 0.11645138710737228, "rewards/frontier_coverage_5": 0.11645138710737228, "rewards/frontier_ece_reward": 0.0018998330924659967, "signal/accuracy_reward/centered_abs_mean": 0.166387939453125, "signal/accuracy_reward/group_std_mean": 0.21394164264202117, "signal/accuracy_reward/group_zero_std_frac": 0.4125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0831939697265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0831939697265625, "signal/advantage_abs_mean": 0.10727472305297851, "signal/advantage_pre_scale_abs_mean": 0.10727472305297851, "signal/advantage_pre_scale_std": 0.15276951789855958, "signal/advantage_std": 0.15276951789855958, "signal/brier_reward/centered_abs_mean": 0.18316833972930907, "signal/brier_reward/group_std_mean": 0.2306816339492798, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022896042466163634, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022896042466163634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07995835840702056, "signal/confidence_uniqueness_reward/group_std_mean": 0.10394333600997925, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00999479480087757, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00999479480087757, "signal/format_reward/centered_abs_mean": 0.00416259765625, "signal/format_reward/group_std_mean": 0.0121533976867795, "signal/format_reward/group_zero_std_frac": 0.93125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002081298828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002081298828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002050551865249872, "signal/frontier_aurc_reward/group_std_mean": 0.003289903746917844, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6704877129523086e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6704877129523086e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22418826520442964, "signal/frontier_coverage_1/group_std_mean": 0.28972225487232206, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_10/centered_abs_mean": 0.22418826520442964, "signal/frontier_coverage_10/group_std_mean": 0.28972225487232206, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_15/centered_abs_mean": 0.22418826520442964, "signal/frontier_coverage_15/group_std_mean": 0.28972225487232206, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_20/centered_abs_mean": 0.22418826520442964, "signal/frontier_coverage_20/group_std_mean": 0.28972225487232206, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_25/centered_abs_mean": 0.22418826520442964, "signal/frontier_coverage_25/group_std_mean": 0.28972225487232206, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_5/centered_abs_mean": 0.22418826520442964, "signal/frontier_coverage_5/group_std_mean": 0.28972225487232206, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0040129697881639, "signal/frontier_ece_reward/centered_abs_mean": 0.07737888991832734, "signal/frontier_ece_reward/group_std_mean": 0.09668841660022735, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009672361239790917, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009672361239790917, "step": 35 }, { "calibration/aurc": 0.4731364410244561, "calibration/batch_distribution_entropy": 0.8993261190445182, "calibration/buffer_distribution_entropy": 0.8327576947575324, "calibration/confidence_entropy": 0.5259689998069328, "calibration/coverage@0%": 0.005091899773608073, "calibration/coverage@1%": 0.005091899773608073, "calibration/coverage@10%": 0.018033076244196308, "calibration/coverage@15%": 0.021954644871647288, "calibration/coverage@20%": 0.03956716933348682, "calibration/coverage@25%": 0.07555059523809524, "calibration/coverage@30%": 0.13774573656517403, "calibration/coverage@5%": 0.012150723303019837, "calibration/ece": 0.12738224827100658, "calibration/mean_confidence": 0.40403795922689695, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 847.6, "completions/max_terminated_length": 396.2, "completions/mean_length": 141.90908203125, "completions/mean_terminated_length": 141.50000915527343, "completions/min_length": 53.6, "completions/min_terminated_length": 53.6, "epoch": 0.128, "grad_norm": 0.001334143104031682, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 134113312.0, "reward": 0.9246591687202453, "reward_std": 0.11305393874645234, "rewards/accuracy_reward": 0.40830078125, "rewards/brier_reward": 0.761095380783081, "rewards/confidence_uniqueness_reward": 0.8618135690689087, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.0039564462844282385, "rewards/frontier_coverage_1": 0.1575838327407837, "rewards/frontier_coverage_10": 0.1575838327407837, "rewards/frontier_coverage_15": 0.1575838327407837, "rewards/frontier_coverage_20": 0.1575838327407837, "rewards/frontier_coverage_25": 0.1575838327407837, "rewards/frontier_coverage_5": 0.1575838327407837, "rewards/frontier_ece_reward": 0.009847322525456548, "signal/accuracy_reward/centered_abs_mean": 0.149713134765625, "signal/accuracy_reward/group_std_mean": 0.19379588663578035, "signal/accuracy_reward/group_zero_std_frac": 0.45625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0748565673828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0748565673828125, "signal/advantage_abs_mean": 0.08884423375129699, "signal/advantage_pre_scale_abs_mean": 0.08884423375129699, "signal/advantage_pre_scale_std": 0.12785588651895524, "signal/advantage_std": 0.12785588651895524, "signal/brier_reward/centered_abs_mean": 0.1698179990053177, "signal/brier_reward/group_std_mean": 0.21664086878299713, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122724987566471, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02122724987566471, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07381970435380936, "signal/confidence_uniqueness_reward/group_std_mean": 0.08946335166692734, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00922746304422617, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00922746304422617, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.0049718443769961596, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014080909080803395, "signal/frontier_aurc_reward/group_std_mean": 0.002259616693481803, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.520482667023316e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.520482667023316e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2586131691932678, "signal/frontier_coverage_1/group_std_mean": 0.32670770287513734, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_10/centered_abs_mean": 0.2586131691932678, "signal/frontier_coverage_10/group_std_mean": 0.32670770287513734, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_15/centered_abs_mean": 0.2586131691932678, "signal/frontier_coverage_15/group_std_mean": 0.32670770287513734, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_20/centered_abs_mean": 0.2586131691932678, "signal/frontier_coverage_20/group_std_mean": 0.32670770287513734, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_25/centered_abs_mean": 0.2586131691932678, "signal/frontier_coverage_25/group_std_mean": 0.32670770287513734, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_5/centered_abs_mean": 0.2586131691932678, "signal/frontier_coverage_5/group_std_mean": 0.32670770287513734, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004629175364971161, "signal/frontier_ece_reward/centered_abs_mean": 0.0536692775785923, "signal/frontier_ece_reward/group_std_mean": 0.07274282872676849, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0067086596973240376, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0067086596973240376, "step": 40 }, { "calibration/aurc": 0.3318122292710428, "calibration/batch_distribution_entropy": 0.9011091838112341, "calibration/buffer_distribution_entropy": 0.8770609177707678, "calibration/confidence_entropy": 0.47220691985763424, "calibration/coverage@0%": 0.010561399217221134, "calibration/coverage@1%": 0.010561399217221134, "calibration/coverage@10%": 0.04851547211350293, "calibration/coverage@15%": 0.10991163160469668, "calibration/coverage@20%": 0.16115994985322896, "calibration/coverage@25%": 0.2792380136986301, "calibration/coverage@30%": 0.40509876467710376, "calibration/coverage@5%": 0.0356103228962818, "calibration/ece": 0.2681012210279697, "calibration/mean_confidence": 0.34697361324306747, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1307.6, "completions/max_terminated_length": 507.2, "completions/mean_length": 148.73115234375, "completions/mean_terminated_length": 148.05319519042968, "completions/min_length": 53.8, "completions/min_terminated_length": 53.8, "epoch": 0.144, "grad_norm": 0.0013268872862681746, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 150586751.0, "reward": 0.9608087778091431, "reward_std": 0.10770493298768997, "rewards/accuracy_reward": 0.5029296875, "rewards/brier_reward": 0.7304662704467774, "rewards/confidence_uniqueness_reward": 0.8599502563476562, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.003411487862467766, "rewards/frontier_coverage_1": 0.08409715853631497, "rewards/frontier_coverage_10": 0.08409715853631497, "rewards/frontier_coverage_15": 0.08409715853631497, "rewards/frontier_coverage_20": 0.08409715853631497, "rewards/frontier_coverage_25": 0.08409715853631497, "rewards/frontier_coverage_5": 0.08409715853631497, "rewards/frontier_ece_reward": 0.016473467275500298, "signal/accuracy_reward/centered_abs_mean": 0.1483642578125, "signal/accuracy_reward/group_std_mean": 0.19843538403511046, "signal/accuracy_reward/group_zero_std_frac": 0.41875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07418212890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07418212890625, "signal/advantage_abs_mean": 0.08254445642232895, "signal/advantage_pre_scale_abs_mean": 0.08254445642232895, "signal/advantage_pre_scale_std": 0.11901939809322357, "signal/advantage_std": 0.11901939809322357, "signal/brier_reward/centered_abs_mean": 0.1825536698102951, "signal/brier_reward/group_std_mean": 0.22881582379341125, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02281920872628689, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02281920872628689, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07770240604877472, "signal/confidence_uniqueness_reward/group_std_mean": 0.09924467504024506, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00971280075609684, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00971280075609684, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013431656872853637, "signal/frontier_aurc_reward/group_std_mean": 0.0021427671890705824, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4042664517764933e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4042664517764933e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.28589142560958863, "signal/frontier_coverage_1/group_std_mean": 0.356065034866333, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_10/centered_abs_mean": 0.28589142560958863, "signal/frontier_coverage_10/group_std_mean": 0.356065034866333, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_15/centered_abs_mean": 0.28589142560958863, "signal/frontier_coverage_15/group_std_mean": 0.356065034866333, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_20/centered_abs_mean": 0.28589142560958863, "signal/frontier_coverage_20/group_std_mean": 0.356065034866333, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_25/centered_abs_mean": 0.28589142560958863, "signal/frontier_coverage_25/group_std_mean": 0.356065034866333, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_5/centered_abs_mean": 0.28589142560958863, "signal/frontier_coverage_5/group_std_mean": 0.356065034866333, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005117456335574389, "signal/frontier_ece_reward/centered_abs_mean": 0.04525191038846969, "signal/frontier_ece_reward/group_std_mean": 0.06264355853199959, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005656488798558712, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005656488798558712, "step": 45 }, { "calibration/aurc": 0.38946164222395224, "calibration/batch_distribution_entropy": 0.8997395386342186, "calibration/buffer_distribution_entropy": 0.9080234560993914, "calibration/confidence_entropy": 0.45810094180544186, "calibration/coverage@0%": 0.0023483365949119373, "calibration/coverage@1%": 0.0023483365949119373, "calibration/coverage@10%": 0.008610567514677103, "calibration/coverage@15%": 0.012133072407045009, "calibration/coverage@20%": 0.07978391442193315, "calibration/coverage@25%": 0.13847784105080388, "calibration/coverage@30%": 0.24525552130098616, "calibration/coverage@5%": 0.0023483365949119373, "calibration/ece": 0.17463787616505108, "calibration/mean_confidence": 0.33009617318790924, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1233.4, "completions/max_terminated_length": 568.2, "completions/mean_length": 154.51337890625, "completions/mean_terminated_length": 153.9742462158203, "completions/min_length": 64.8, "completions/min_terminated_length": 64.8, "epoch": 0.16, "grad_norm": 0.0014597562840208411, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 167189896.0, "reward": 0.9457063794136047, "reward_std": 0.10457922667264938, "rewards/accuracy_reward": 0.45458984375, "rewards/brier_reward": 0.7449754357337952, "rewards/confidence_uniqueness_reward": 0.8708751559257507, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.003416177164763212, "rewards/frontier_coverage_1": 0.1375892072916031, "rewards/frontier_coverage_10": 0.1375892072916031, "rewards/frontier_coverage_15": 0.1375892072916031, "rewards/frontier_coverage_20": 0.1375892072916031, "rewards/frontier_coverage_25": 0.1375892072916031, "rewards/frontier_coverage_5": 0.1375892072916031, "rewards/frontier_ece_reward": 0.0168386897072196, "signal/accuracy_reward/centered_abs_mean": 0.139410400390625, "signal/accuracy_reward/group_std_mean": 0.18234478533267975, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0697052001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0697052001953125, "signal/advantage_abs_mean": 0.08101131021976471, "signal/advantage_pre_scale_abs_mean": 0.08101131021976471, "signal/advantage_pre_scale_std": 0.11804848611354828, "signal/advantage_std": 0.11804848611354828, "signal/brier_reward/centered_abs_mean": 0.18030621111392975, "signal/brier_reward/group_std_mean": 0.22819359302520753, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02253827638924122, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02253827638924122, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06919719129800797, "signal/confidence_uniqueness_reward/group_std_mean": 0.08767969161272049, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008649648912250996, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008649648912250996, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001562736975029111, "signal/frontier_aurc_reward/group_std_mean": 0.002508711442351341, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7972989846603015e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7972989846603015e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2776048481464386, "signal/frontier_coverage_1/group_std_mean": 0.34865164160728457, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_10/centered_abs_mean": 0.2776048481464386, "signal/frontier_coverage_10/group_std_mean": 0.34865164160728457, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_15/centered_abs_mean": 0.2776048481464386, "signal/frontier_coverage_15/group_std_mean": 0.34865164160728457, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_20/centered_abs_mean": 0.2776048481464386, "signal/frontier_coverage_20/group_std_mean": 0.34865164160728457, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_25/centered_abs_mean": 0.2776048481464386, "signal/frontier_coverage_25/group_std_mean": 0.34865164160728457, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_5/centered_abs_mean": 0.2776048481464386, "signal/frontier_coverage_5/group_std_mean": 0.34865164160728457, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004969126544892788, "signal/frontier_ece_reward/centered_abs_mean": 0.046651491522789, "signal/frontier_ece_reward/group_std_mean": 0.06448797807097435, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005831436440348625, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005831436440348625, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6191944124446507, "eval_calibration/batch_distribution_entropy": 0.909546140975906, "eval_calibration/buffer_distribution_entropy": 0.9216257279279545, "eval_calibration/confidence_entropy": 0.43642560073917536, "eval_calibration/coverage@0%": 0.0078125, "eval_calibration/coverage@1%": 0.0078125, "eval_calibration/coverage@10%": 0.0078125, "eval_calibration/coverage@15%": 0.0078125, "eval_calibration/coverage@20%": 0.0078125, "eval_calibration/coverage@25%": 0.0078125, "eval_calibration/coverage@30%": 0.0078125, "eval_calibration/coverage@5%": 0.0078125, "eval_calibration/ece": 0.2444125504032258, "eval_calibration/mean_confidence": 0.3953807963709677, "eval_completions/clipped_ratio": 0.002155172413793094, "eval_completions/max_length": 594.25, "eval_completions/max_terminated_length": 312.0, "eval_completions/mean_length": 162.88725662231445, "eval_completions/mean_terminated_length": 159.92407989501953, "eval_completions/min_length": 74.5, "eval_completions/min_terminated_length": 74.5, "eval_loss": 0.0, "eval_num_tokens": 167189896.0, "eval_reward": 0.9074415266513824, "eval_reward_std": 0.20979441329836845, "eval_rewards/accuracy_reward": 0.369140625, "eval_rewards/brier_reward": 0.7619887739419937, "eval_rewards/confidence_uniqueness_reward": 0.846884474158287, "eval_rewards/format_reward": 0.99609375, "eval_rewards/frontier_aurc_reward": -0.0038871413562446833, "eval_rewards/frontier_coverage_1": 0.20465973764657974, "eval_rewards/frontier_coverage_10": 0.20465973764657974, "eval_rewards/frontier_coverage_15": 0.20465973764657974, "eval_rewards/frontier_coverage_20": 0.20465973764657974, "eval_rewards/frontier_coverage_25": 0.20465973764657974, "eval_rewards/frontier_coverage_5": 0.20465973764657974, "eval_rewards/frontier_ece_reward": 0.014434305019676685, "eval_runtime": 26.2542, "eval_samples_per_second": 19.045, "eval_signal/accuracy_reward/centered_abs_mean": 0.4490966796875, "eval_signal/accuracy_reward/group_std_mean": 0.4801955074071884, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22454833984375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22454833984375, "eval_signal/advantage_abs_mean": 0.18047761172056198, "eval_signal/advantage_pre_scale_abs_mean": 0.18047761172056198, "eval_signal/advantage_pre_scale_std": 0.20848042145371437, "eval_signal/advantage_std": 0.20848042145371437, "eval_signal/brier_reward/centered_abs_mean": 0.22210051491856575, "eval_signal/brier_reward/group_std_mean": 0.27025653421878815, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02776256436482072, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02776256436482072, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07186052948236465, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09253636561334133, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008982566185295582, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008982566185295582, "eval_signal/format_reward/centered_abs_mean": 0.007568359375, "eval_signal/format_reward/group_std_mean": 0.022097086533904076, "eval_signal/format_reward/group_zero_std_frac": 0.875, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002644163556396961, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004254971107002348, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7330525376310106e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7330525376310106e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.42421814799308777, "eval_signal/frontier_coverage_1/group_std_mean": 0.5166834145784378, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.42421814799308777, "eval_signal/frontier_coverage_10/group_std_mean": 0.5166834145784378, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.42421814799308777, "eval_signal/frontier_coverage_15/group_std_mean": 0.5166834145784378, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.42421814799308777, "eval_signal/frontier_coverage_20/group_std_mean": 0.5166834145784378, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.42421814799308777, "eval_signal/frontier_coverage_25/group_std_mean": 0.5166834145784378, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.42421814799308777, "eval_signal/frontier_coverage_5/group_std_mean": 0.5166834145784378, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007593504618853331, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05587606783956289, "eval_signal/frontier_ece_reward/group_std_mean": 0.08752950467169285, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006984508479945362, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006984508479945362, "eval_steps_per_second": 0.152, "step": 50 }, { "calibration/aurc": 0.40226900078093014, "calibration/batch_distribution_entropy": 0.9552943382208323, "calibration/buffer_distribution_entropy": 0.9272863852228177, "calibration/confidence_entropy": 0.4636731554061452, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.00234375, "calibration/coverage@15%": 0.003125, "calibration/coverage@20%": 0.015234375, "calibration/coverage@25%": 0.069921875, "calibration/coverage@30%": 0.260546875, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.2050855796754508, "calibration/mean_confidence": 0.4194716920787247, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 669.2, "completions/max_terminated_length": 446.2, "completions/mean_length": 159.76171875, "completions/mean_terminated_length": 159.6274383544922, "completions/min_length": 69.8, "completions/min_terminated_length": 69.8, "epoch": 0.176, "grad_norm": 0.0011799404164776206, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 184062976.0, "reward": 0.9487934350967407, "reward_std": 0.10535514205694199, "rewards/accuracy_reward": 0.451953125, "rewards/brier_reward": 0.7491334080696106, "rewards/confidence_uniqueness_reward": 0.8902879357337952, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003473851131275296, "rewards/frontier_coverage_1": 0.1475163072347641, "rewards/frontier_coverage_10": 0.1475163072347641, "rewards/frontier_coverage_15": 0.1475163072347641, "rewards/frontier_coverage_20": 0.1475163072347641, "rewards/frontier_coverage_25": 0.1475163072347641, "rewards/frontier_coverage_5": 0.1475163072347641, "rewards/frontier_ece_reward": 0.018036763183772564, "signal/accuracy_reward/centered_abs_mean": 0.14241943359375, "signal/accuracy_reward/group_std_mean": 0.18591534495353698, "signal/accuracy_reward/group_zero_std_frac": 0.48125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071209716796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.071209716796875, "signal/advantage_abs_mean": 0.0813161700963974, "signal/advantage_pre_scale_abs_mean": 0.0813161700963974, "signal/advantage_pre_scale_std": 0.11816369593143464, "signal/advantage_std": 0.11816369593143464, "signal/brier_reward/centered_abs_mean": 0.18312384486198424, "signal/brier_reward/group_std_mean": 0.2311247318983078, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02289048060774803, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02289048060774803, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05273754522204399, "signal/confidence_uniqueness_reward/group_std_mean": 0.0655559055507183, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006592193152755499, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006592193152755499, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020946973469108342, "signal/frontier_aurc_reward/group_std_mean": 0.0033506324514746668, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.749508032342419e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.749508032342419e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.27024593353271487, "signal/frontier_coverage_1/group_std_mean": 0.3390821158885956, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_10/centered_abs_mean": 0.27024593353271487, "signal/frontier_coverage_10/group_std_mean": 0.3390821158885956, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_15/centered_abs_mean": 0.27024593353271487, "signal/frontier_coverage_15/group_std_mean": 0.3390821158885956, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_20/centered_abs_mean": 0.27024593353271487, "signal/frontier_coverage_20/group_std_mean": 0.3390821158885956, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_25/centered_abs_mean": 0.27024593353271487, "signal/frontier_coverage_25/group_std_mean": 0.3390821158885956, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_5/centered_abs_mean": 0.27024593353271487, "signal/frontier_coverage_5/group_std_mean": 0.3390821158885956, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004837402049452067, "signal/frontier_ece_reward/centered_abs_mean": 0.05012344047427177, "signal/frontier_ece_reward/group_std_mean": 0.06734204888343812, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006265430059283972, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006265430059283972, "step": 55 }, { "calibration/aurc": 0.34397713585565626, "calibration/batch_distribution_entropy": 0.9688834897018976, "calibration/buffer_distribution_entropy": 0.9367661297882097, "calibration/confidence_entropy": 0.45534865455503254, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.00625, "calibration/coverage@15%": 0.037890625, "calibration/coverage@20%": 0.123828125, "calibration/coverage@25%": 0.208984375, "calibration/coverage@30%": 0.38515625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.12179229650095984, "calibration/mean_confidence": 0.47636667340644767, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 658.8, "completions/max_terminated_length": 420.6, "completions/mean_length": 163.17666015625, "completions/mean_terminated_length": 163.04219665527344, "completions/min_length": 72.8, "completions/min_terminated_length": 72.8, "epoch": 0.192, "grad_norm": 0.0012748718727380037, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 200548721.0, "reward": 0.9616720676422119, "reward_std": 0.11071353554725646, "rewards/accuracy_reward": 0.4712890625, "rewards/brier_reward": 0.7608612418174744, "rewards/confidence_uniqueness_reward": 0.9034651756286621, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003351506870239973, "rewards/frontier_coverage_1": 0.13997417837381362, "rewards/frontier_coverage_10": 0.13997417837381362, "rewards/frontier_coverage_15": 0.13997417837381362, "rewards/frontier_coverage_20": 0.13997417837381362, "rewards/frontier_coverage_25": 0.13997417837381362, "rewards/frontier_coverage_5": 0.13997417837381362, "rewards/frontier_ece_reward": 0.025279919058084487, "signal/accuracy_reward/centered_abs_mean": 0.13929443359375, "signal/accuracy_reward/group_std_mean": 0.18363580107688904, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069647216796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.069647216796875, "signal/advantage_abs_mean": 0.08572653234004975, "signal/advantage_pre_scale_abs_mean": 0.08572653234004975, "signal/advantage_pre_scale_std": 0.1261049687862396, "signal/advantage_std": 0.1261049687862396, "signal/brier_reward/centered_abs_mean": 0.18710338175296784, "signal/brier_reward/group_std_mean": 0.23497919142246246, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02338792271912098, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02338792271912098, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04473799243569374, "signal/confidence_uniqueness_reward/group_std_mean": 0.05513100624084473, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005592249054461718, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005592249054461718, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025330715347081424, "signal/frontier_aurc_reward/group_std_mean": 0.0038898529950529338, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.534197796601802e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.534197796601802e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.25085292756557465, "signal/frontier_coverage_1/group_std_mean": 0.318107670545578, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_10/centered_abs_mean": 0.25085292756557465, "signal/frontier_coverage_10/group_std_mean": 0.318107670545578, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_15/centered_abs_mean": 0.25085292756557465, "signal/frontier_coverage_15/group_std_mean": 0.318107670545578, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_20/centered_abs_mean": 0.25085292756557465, "signal/frontier_coverage_20/group_std_mean": 0.318107670545578, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_25/centered_abs_mean": 0.25085292756557465, "signal/frontier_coverage_25/group_std_mean": 0.318107670545578, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_5/centered_abs_mean": 0.25085292756557465, "signal/frontier_coverage_5/group_std_mean": 0.318107670545578, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044902671128511425, "signal/frontier_ece_reward/centered_abs_mean": 0.057060886174440384, "signal/frontier_ece_reward/group_std_mean": 0.0741629496216774, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007132610771805048, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007132610771805048, "step": 60 }, { "calibration/aurc": 0.3186970268959458, "calibration/batch_distribution_entropy": 0.9612344061340921, "calibration/buffer_distribution_entropy": 0.942231301780966, "calibration/confidence_entropy": 0.44323483168206046, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.07578125, "calibration/coverage@15%": 0.178125, "calibration/coverage@20%": 0.316015625, "calibration/coverage@25%": 0.45234375, "calibration/coverage@30%": 0.526171875, "calibration/coverage@5%": 0.003515625, "calibration/ece": 0.16880598958333337, "calibration/mean_confidence": 0.5302666666666667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 635.8, "completions/max_terminated_length": 635.8, "completions/mean_length": 167.496875, "completions/mean_terminated_length": 167.496875, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.208, "grad_norm": 0.0012696352787315845, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 217296113.0, "reward": 0.9835787534713745, "reward_std": 0.115215602517128, "rewards/accuracy_reward": 0.51943359375, "rewards/brier_reward": 0.7641871452331543, "rewards/confidence_uniqueness_reward": 0.9048604130744934, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002962963841855526, "rewards/frontier_coverage_1": 0.10569853037595749, "rewards/frontier_coverage_10": 0.10569853037595749, "rewards/frontier_coverage_15": 0.10569853037595749, "rewards/frontier_coverage_20": 0.10569853037595749, "rewards/frontier_coverage_25": 0.10569853037595749, "rewards/frontier_coverage_5": 0.10569853037595749, "rewards/frontier_ece_reward": 0.032237300649285316, "signal/accuracy_reward/centered_abs_mean": 0.139007568359375, "signal/accuracy_reward/group_std_mean": 0.18477267920970916, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0695037841796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0695037841796875, "signal/advantage_abs_mean": 0.08863357156515121, "signal/advantage_pre_scale_abs_mean": 0.08863357156515121, "signal/advantage_pre_scale_std": 0.13259580731391907, "signal/advantage_std": 0.13259580731391907, "signal/brier_reward/centered_abs_mean": 0.18662929832935332, "signal/brier_reward/group_std_mean": 0.23361400365829468, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023328662291169165, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023328662291169165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05014804154634476, "signal/confidence_uniqueness_reward/group_std_mean": 0.060537828505039214, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006268505193293095, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006268505193293095, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027079980354756117, "signal/frontier_aurc_reward/group_std_mean": 0.004054524842649699, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8473164497409016e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8473164497409016e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22929660975933075, "signal/frontier_coverage_1/group_std_mean": 0.29457331299781797, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_10/centered_abs_mean": 0.22929660975933075, "signal/frontier_coverage_10/group_std_mean": 0.29457331299781797, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_15/centered_abs_mean": 0.22929660975933075, "signal/frontier_coverage_15/group_std_mean": 0.29457331299781797, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_20/centered_abs_mean": 0.22929660975933075, "signal/frontier_coverage_20/group_std_mean": 0.29457331299781797, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_25/centered_abs_mean": 0.22929660975933075, "signal/frontier_coverage_25/group_std_mean": 0.29457331299781797, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_5/centered_abs_mean": 0.22929660975933075, "signal/frontier_coverage_5/group_std_mean": 0.29457331299781797, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004104409227147698, "signal/frontier_ece_reward/centered_abs_mean": 0.05887450873851776, "signal/frontier_ece_reward/group_std_mean": 0.07482730895280838, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00735931359231472, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00735931359231472, "step": 65 }, { "calibration/aurc": 0.3285903790301356, "calibration/batch_distribution_entropy": 0.9653098937115295, "calibration/buffer_distribution_entropy": 0.9474725315085728, "calibration/confidence_entropy": 0.4189063430289507, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.08909313725490195, "calibration/coverage@15%": 0.17904105392156863, "calibration/coverage@20%": 0.27637254901960784, "calibration/coverage@25%": 0.4222089460784314, "calibration/coverage@30%": 0.5321078431372549, "calibration/coverage@5%": 0.01604626225490196, "calibration/ece": 0.16827429222736107, "calibration/mean_confidence": 0.4845619210079331, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1140.4, "completions/max_terminated_length": 697.4, "completions/mean_length": 167.4361328125, "completions/mean_terminated_length": 166.9017761230469, "completions/min_length": 64.4, "completions/min_terminated_length": 64.4, "epoch": 0.224, "grad_norm": 0.0010618689702823758, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 234163843.0, "reward": 0.9629539489746094, "reward_std": 0.11185683757066726, "rewards/accuracy_reward": 0.468359375, "rewards/brier_reward": 0.7660558819770813, "rewards/confidence_uniqueness_reward": 0.9080687403678894, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0033203907776623966, "rewards/frontier_coverage_1": 0.1524452567100525, "rewards/frontier_coverage_10": 0.1524452567100525, "rewards/frontier_coverage_15": 0.1524452567100525, "rewards/frontier_coverage_20": 0.1524452567100525, "rewards/frontier_coverage_25": 0.1524452567100525, "rewards/frontier_coverage_5": 0.1524452567100525, "rewards/frontier_ece_reward": 0.027517157793045043, "signal/accuracy_reward/centered_abs_mean": 0.1264404296875, "signal/accuracy_reward/group_std_mean": 0.17330618500709533, "signal/accuracy_reward/group_zero_std_frac": 0.478125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06322021484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06322021484375, "signal/advantage_abs_mean": 0.08456142097711564, "signal/advantage_pre_scale_abs_mean": 0.08456142097711564, "signal/advantage_pre_scale_std": 0.12961900383234023, "signal/advantage_std": 0.12961900383234023, "signal/brier_reward/centered_abs_mean": 0.1864775687456131, "signal/brier_reward/group_std_mean": 0.23539321422576903, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02330969609320164, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02330969609320164, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.052434886991977694, "signal/confidence_uniqueness_reward/group_std_mean": 0.06445125937461853, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006554360873997212, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006554360873997212, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030171813908964396, "signal/frontier_aurc_reward/group_std_mean": 0.004486602451652289, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4007543803891166e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4007543803891166e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2229921907186508, "signal/frontier_coverage_1/group_std_mean": 0.28949338793754575, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_10/centered_abs_mean": 0.2229921907186508, "signal/frontier_coverage_10/group_std_mean": 0.28949338793754575, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_15/centered_abs_mean": 0.2229921907186508, "signal/frontier_coverage_15/group_std_mean": 0.28949338793754575, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_20/centered_abs_mean": 0.2229921907186508, "signal/frontier_coverage_20/group_std_mean": 0.28949338793754575, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_25/centered_abs_mean": 0.2229921907186508, "signal/frontier_coverage_25/group_std_mean": 0.28949338793754575, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_5/centered_abs_mean": 0.2229921907186508, "signal/frontier_coverage_5/group_std_mean": 0.28949338793754575, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039915600791573524, "signal/frontier_ece_reward/centered_abs_mean": 0.05467732772231102, "signal/frontier_ece_reward/group_std_mean": 0.06959621906280518, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006834665965288878, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006834665965288878, "step": 70 }, { "calibration/aurc": 0.38145099671484345, "calibration/batch_distribution_entropy": 0.9351019623512027, "calibration/buffer_distribution_entropy": 0.9511017219264826, "calibration/confidence_entropy": 0.41021463710127815, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1296875, "calibration/coverage@15%": 0.169140625, "calibration/coverage@20%": 0.197265625, "calibration/coverage@25%": 0.23046875, "calibration/coverage@30%": 0.278125, "calibration/coverage@5%": 0.0703125, "calibration/ece": 0.18824602337447136, "calibration/mean_confidence": 0.5374034910250385, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1337.2, "completions/max_terminated_length": 494.0, "completions/mean_length": 170.96103515625, "completions/mean_terminated_length": 170.29381103515624, "completions/min_length": 67.2, "completions/min_terminated_length": 67.2, "epoch": 0.24, "grad_norm": 0.0012356507359072566, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 251166164.0, "reward": 0.9860649704933167, "reward_std": 0.12122494280338288, "rewards/accuracy_reward": 0.52705078125, "rewards/brier_reward": 0.7560766577720642, "rewards/confidence_uniqueness_reward": 0.9106106519699096, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003134048730134964, "rewards/frontier_coverage_1": 0.101060039550066, "rewards/frontier_coverage_10": 0.101060039550066, "rewards/frontier_coverage_15": 0.101060039550066, "rewards/frontier_coverage_20": 0.101060039550066, "rewards/frontier_coverage_25": 0.101060039550066, "rewards/frontier_coverage_5": 0.101060039550066, "rewards/frontier_ece_reward": 0.029981668666005136, "signal/accuracy_reward/centered_abs_mean": 0.150408935546875, "signal/accuracy_reward/group_std_mean": 0.1955954134464264, "signal/accuracy_reward/group_zero_std_frac": 0.453125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0752044677734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0752044677734375, "signal/advantage_abs_mean": 0.09415247589349747, "signal/advantage_pre_scale_abs_mean": 0.09415247589349747, "signal/advantage_pre_scale_std": 0.14144977927207947, "signal/advantage_std": 0.14144977927207947, "signal/brier_reward/centered_abs_mean": 0.19640157520771026, "signal/brier_reward/group_std_mean": 0.24560691714286803, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024550196900963783, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024550196900963783, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05692050457000732, "signal/confidence_uniqueness_reward/group_std_mean": 0.06995929852128029, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007115063071250915, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007115063071250915, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031411519274115564, "signal/frontier_aurc_reward/group_std_mean": 0.004663504846394062, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.622661701636389e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.622661701636389e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22368650436401366, "signal/frontier_coverage_1/group_std_mean": 0.29344227313995364, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_10/centered_abs_mean": 0.22368650436401366, "signal/frontier_coverage_10/group_std_mean": 0.29344227313995364, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_15/centered_abs_mean": 0.22368650436401366, "signal/frontier_coverage_15/group_std_mean": 0.29344227313995364, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_20/centered_abs_mean": 0.22368650436401366, "signal/frontier_coverage_20/group_std_mean": 0.29344227313995364, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_25/centered_abs_mean": 0.22368650436401366, "signal/frontier_coverage_25/group_std_mean": 0.29344227313995364, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_5/centered_abs_mean": 0.22368650436401366, "signal/frontier_coverage_5/group_std_mean": 0.29344227313995364, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004003988299518824, "signal/frontier_ece_reward/centered_abs_mean": 0.055852291733026506, "signal/frontier_ece_reward/group_std_mean": 0.07003621906042098, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006981536466628313, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006981536466628313, "step": 75 }, { "calibration/aurc": 0.28470953702454416, "calibration/batch_distribution_entropy": 0.9208122623181048, "calibration/buffer_distribution_entropy": 0.9522540519535683, "calibration/confidence_entropy": 0.3878620407373435, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1923449730919765, "calibration/coverage@15%": 0.30060695939334636, "calibration/coverage@20%": 0.39478733488258316, "calibration/coverage@25%": 0.5057401235322896, "calibration/coverage@30%": 0.6214056384540118, "calibration/coverage@5%": 0.009375, "calibration/ece": 0.13667208102921785, "calibration/mean_confidence": 0.5453152864845467, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 933.6, "completions/max_terminated_length": 493.0, "completions/mean_length": 170.7779296875, "completions/mean_terminated_length": 170.2452423095703, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.256, "grad_norm": 0.0012276864144951105, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 267969746.0, "reward": 0.9812070965766907, "reward_std": 0.1114748939871788, "rewards/accuracy_reward": 0.5060546875, "rewards/brier_reward": 0.7687627673149109, "rewards/confidence_uniqueness_reward": 0.9120404958724976, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.003094889922067523, "rewards/frontier_coverage_1": 0.1360874891281128, "rewards/frontier_coverage_10": 0.1360874891281128, "rewards/frontier_coverage_15": 0.1360874891281128, "rewards/frontier_coverage_20": 0.1360874891281128, "rewards/frontier_coverage_25": 0.1360874891281128, "rewards/frontier_coverage_5": 0.1360874891281128, "rewards/frontier_ece_reward": 0.030495237931609152, "signal/accuracy_reward/centered_abs_mean": 0.13546142578125, "signal/accuracy_reward/group_std_mean": 0.1735977828502655, "signal/accuracy_reward/group_zero_std_frac": 0.521875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.067730712890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.067730712890625, "signal/advantage_abs_mean": 0.08631241321563721, "signal/advantage_pre_scale_abs_mean": 0.08631241321563721, "signal/advantage_pre_scale_std": 0.1342590034008026, "signal/advantage_std": 0.1342590034008026, "signal/brier_reward/centered_abs_mean": 0.18553363680839538, "signal/brier_reward/group_std_mean": 0.2332346946001053, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023191704601049423, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023191704601049423, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05944240242242813, "signal/confidence_uniqueness_reward/group_std_mean": 0.07294412925839425, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0074303003028035166, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0074303003028035166, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003175769792869687, "signal/frontier_aurc_reward/group_std_mean": 0.004814452119171619, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6846276856958865e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6846276856958865e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2149658679962158, "signal/frontier_coverage_1/group_std_mean": 0.27974134087562563, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_10/centered_abs_mean": 0.2149658679962158, "signal/frontier_coverage_10/group_std_mean": 0.27974134087562563, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_15/centered_abs_mean": 0.2149658679962158, "signal/frontier_coverage_15/group_std_mean": 0.27974134087562563, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_20/centered_abs_mean": 0.2149658679962158, "signal/frontier_coverage_20/group_std_mean": 0.27974134087562563, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_25/centered_abs_mean": 0.2149658679962158, "signal/frontier_coverage_25/group_std_mean": 0.27974134087562563, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_5/centered_abs_mean": 0.2149658679962158, "signal/frontier_coverage_5/group_std_mean": 0.27974134087562563, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003847888810560107, "signal/frontier_ece_reward/centered_abs_mean": 0.04882029145956039, "signal/frontier_ece_reward/group_std_mean": 0.06196781545877457, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006102536432445049, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006102536432445049, "step": 80 }, { "calibration/aurc": 0.3789066982157474, "calibration/batch_distribution_entropy": 0.946901981250825, "calibration/buffer_distribution_entropy": 0.9537924269959485, "calibration/confidence_entropy": 0.4054690863640535, "calibration/coverage@0%": 0.007421875, "calibration/coverage@1%": 0.007421875, "calibration/coverage@10%": 0.102734375, "calibration/coverage@15%": 0.149609375, "calibration/coverage@20%": 0.204296875, "calibration/coverage@25%": 0.254296875, "calibration/coverage@30%": 0.32421875, "calibration/coverage@5%": 0.039453125, "calibration/ece": 0.16809869559712115, "calibration/mean_confidence": 0.5157878198455872, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 513.2, "completions/max_terminated_length": 513.2, "completions/mean_length": 179.4818359375, "completions/mean_terminated_length": 179.4818359375, "completions/min_length": 74.2, "completions/min_terminated_length": 74.2, "epoch": 0.272, "grad_norm": 0.001049001351930201, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 284773336.0, "reward": 0.9761500954627991, "reward_std": 0.105889230966568, "rewards/accuracy_reward": 0.4970703125, "rewards/brier_reward": 0.7569293856620789, "rewards/confidence_uniqueness_reward": 0.9251906633377075, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0031034779269248245, "rewards/frontier_coverage_1": 0.13408799916505815, "rewards/frontier_coverage_10": 0.13408799916505815, "rewards/frontier_coverage_15": 0.13408799916505815, "rewards/frontier_coverage_20": 0.13408799916505815, "rewards/frontier_coverage_25": 0.13408799916505815, "rewards/frontier_coverage_5": 0.13408799916505815, "rewards/frontier_ece_reward": 0.024426154047250747, "signal/accuracy_reward/centered_abs_mean": 0.12740478515625, "signal/accuracy_reward/group_std_mean": 0.16262791752815248, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.063702392578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.063702392578125, "signal/advantage_abs_mean": 0.08320691287517548, "signal/advantage_pre_scale_abs_mean": 0.08320691287517548, "signal/advantage_pre_scale_std": 0.12677900344133378, "signal/advantage_std": 0.12677900344133378, "signal/brier_reward/centered_abs_mean": 0.1897197872400284, "signal/brier_reward/group_std_mean": 0.23877674639225005, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02371497340500355, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02371497340500355, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04770283699035645, "signal/confidence_uniqueness_reward/group_std_mean": 0.058389055728912356, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005962854623794556, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005962854623794556, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002825378580018878, "signal/frontier_aurc_reward/group_std_mean": 0.004264938598498702, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0574273336678745e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0574273336678745e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22668876647949218, "signal/frontier_coverage_1/group_std_mean": 0.29389293789863585, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_10/centered_abs_mean": 0.22668876647949218, "signal/frontier_coverage_10/group_std_mean": 0.29389293789863585, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_15/centered_abs_mean": 0.22668876647949218, "signal/frontier_coverage_15/group_std_mean": 0.29389293789863585, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_20/centered_abs_mean": 0.22668876647949218, "signal/frontier_coverage_20/group_std_mean": 0.29389293789863585, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_25/centered_abs_mean": 0.22668876647949218, "signal/frontier_coverage_25/group_std_mean": 0.29389293789863585, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_5/centered_abs_mean": 0.22668876647949218, "signal/frontier_coverage_5/group_std_mean": 0.29389293789863585, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004057728871703148, "signal/frontier_ece_reward/centered_abs_mean": 0.04503390789031982, "signal/frontier_ece_reward/group_std_mean": 0.05716151520609856, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005629238486289978, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005629238486289978, "step": 85 }, { "calibration/aurc": 0.3297805723752279, "calibration/batch_distribution_entropy": 0.9416902309925609, "calibration/buffer_distribution_entropy": 0.9557326159162518, "calibration/confidence_entropy": 0.4033264689268282, "calibration/coverage@0%": 0.006653620352250489, "calibration/coverage@1%": 0.006653620352250489, "calibration/coverage@10%": 0.006653620352250489, "calibration/coverage@15%": 0.08688845401174168, "calibration/coverage@20%": 0.2244228534735812, "calibration/coverage@25%": 0.28773391634050877, "calibration/coverage@30%": 0.37174581090998043, "calibration/coverage@5%": 0.006653620352250489, "calibration/ece": 0.1285960999935037, "calibration/mean_confidence": 0.5267547320265696, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 991.8, "completions/max_terminated_length": 555.6, "completions/mean_length": 178.51474609375, "completions/mean_terminated_length": 178.11636047363282, "completions/min_length": 77.4, "completions/min_terminated_length": 77.4, "epoch": 0.288, "grad_norm": 0.001152192009612918, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 301559503.0, "reward": 0.9819895029067993, "reward_std": 0.10433640331029892, "rewards/accuracy_reward": 0.50400390625, "rewards/brier_reward": 0.7620156645774842, "rewards/confidence_uniqueness_reward": 0.9369817614555359, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002946482878178358, "rewards/frontier_coverage_1": 0.13725561499595643, "rewards/frontier_coverage_10": 0.13725561499595643, "rewards/frontier_coverage_15": 0.13725561499595643, "rewards/frontier_coverage_20": 0.13725561499595643, "rewards/frontier_coverage_25": 0.13725561499595643, "rewards/frontier_coverage_5": 0.13725561499595643, "rewards/frontier_ece_reward": 0.024566837400197983, "signal/accuracy_reward/centered_abs_mean": 0.129986572265625, "signal/accuracy_reward/group_std_mean": 0.17214542329311372, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0649932861328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0649932861328125, "signal/advantage_abs_mean": 0.07953081279993057, "signal/advantage_pre_scale_abs_mean": 0.07953081279993057, "signal/advantage_pre_scale_std": 0.12358220815658569, "signal/advantage_std": 0.12358220815658569, "signal/brier_reward/centered_abs_mean": 0.1853651374578476, "signal/brier_reward/group_std_mean": 0.23431913554668427, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02317064218223095, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02317064218223095, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037301665544509886, "signal/confidence_uniqueness_reward/group_std_mean": 0.04643301069736481, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004662708193063736, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004662708193063736, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025432564318180082, "signal/frontier_aurc_reward/group_std_mean": 0.003974350774660707, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5524286542786284e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5524286542786284e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.232001656293869, "signal/frontier_coverage_1/group_std_mean": 0.2984708070755005, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_10/centered_abs_mean": 0.232001656293869, "signal/frontier_coverage_10/group_std_mean": 0.2984708070755005, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_15/centered_abs_mean": 0.232001656293869, "signal/frontier_coverage_15/group_std_mean": 0.2984708070755005, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_20/centered_abs_mean": 0.232001656293869, "signal/frontier_coverage_20/group_std_mean": 0.2984708070755005, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_25/centered_abs_mean": 0.232001656293869, "signal/frontier_coverage_25/group_std_mean": 0.2984708070755005, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_5/centered_abs_mean": 0.232001656293869, "signal/frontier_coverage_5/group_std_mean": 0.2984708070755005, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004152829479426146, "signal/frontier_ece_reward/centered_abs_mean": 0.04061479941010475, "signal/frontier_ece_reward/group_std_mean": 0.051888493448495866, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005076849926263094, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005076849926263094, "step": 90 }, { "calibration/aurc": 0.29826600631964345, "calibration/batch_distribution_entropy": 0.9458930250074676, "calibration/buffer_distribution_entropy": 0.9569944190219012, "calibration/confidence_entropy": 0.41124185655389256, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.07421875, "calibration/coverage@15%": 0.15625, "calibration/coverage@20%": 0.2375, "calibration/coverage@25%": 0.36328125, "calibration/coverage@30%": 0.48671875, "calibration/coverage@5%": 0.025390625, "calibration/ece": 0.1401060364598731, "calibration/mean_confidence": 0.5169087501931073, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 765.4, "completions/max_terminated_length": 564.4, "completions/mean_length": 184.74921875, "completions/mean_terminated_length": 184.61671752929686, "completions/min_length": 77.2, "completions/min_terminated_length": 77.2, "epoch": 0.304, "grad_norm": 0.0009637068142183125, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 318381287.0, "reward": 0.9803183794021606, "reward_std": 0.09998638182878494, "rewards/accuracy_reward": 0.50380859375, "rewards/brier_reward": 0.7553321838378906, "rewards/confidence_uniqueness_reward": 0.9388602018356323, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0029232859145849944, "rewards/frontier_coverage_1": 0.13151973783969878, "rewards/frontier_coverage_10": 0.13151973783969878, "rewards/frontier_coverage_15": 0.13151973783969878, "rewards/frontier_coverage_20": 0.13151973783969878, "rewards/frontier_coverage_25": 0.13151973783969878, "rewards/frontier_coverage_5": 0.13151973783969878, "rewards/frontier_ece_reward": 0.020927964150905608, "signal/accuracy_reward/centered_abs_mean": 0.127679443359375, "signal/accuracy_reward/group_std_mean": 0.17264682054519653, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0638397216796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0638397216796875, "signal/advantage_abs_mean": 0.07606031596660615, "signal/advantage_pre_scale_abs_mean": 0.07606031596660615, "signal/advantage_pre_scale_std": 0.11618665158748627, "signal/advantage_std": 0.11618665158748627, "signal/brier_reward/centered_abs_mean": 0.1843687653541565, "signal/brier_reward/group_std_mean": 0.23427461981773376, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023046095669269562, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023046095669269562, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03518189340829849, "signal/confidence_uniqueness_reward/group_std_mean": 0.04454438164830208, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004397736676037311, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004397736676037311, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023705217288807036, "signal/frontier_aurc_reward/group_std_mean": 0.0036838185507804154, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.243233852321282e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.243233852321282e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2418591320514679, "signal/frontier_coverage_1/group_std_mean": 0.31356959939002993, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_10/centered_abs_mean": 0.2418591320514679, "signal/frontier_coverage_10/group_std_mean": 0.31356959939002993, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_15/centered_abs_mean": 0.2418591320514679, "signal/frontier_coverage_15/group_std_mean": 0.31356959939002993, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_20/centered_abs_mean": 0.2418591320514679, "signal/frontier_coverage_20/group_std_mean": 0.31356959939002993, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_25/centered_abs_mean": 0.2418591320514679, "signal/frontier_coverage_25/group_std_mean": 0.31356959939002993, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_5/centered_abs_mean": 0.2418591320514679, "signal/frontier_coverage_5/group_std_mean": 0.31356959939002993, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004329278413206339, "signal/frontier_ece_reward/centered_abs_mean": 0.03723434209823608, "signal/frontier_ece_reward/group_std_mean": 0.04741183742880821, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00465429276227951, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00465429276227951, "step": 95 }, { "calibration/aurc": 0.2544567539913821, "calibration/batch_distribution_entropy": 0.9350166127572491, "calibration/buffer_distribution_entropy": 0.958189052231246, "calibration/confidence_entropy": 0.39907110397213336, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.2701145119863014, "calibration/coverage@15%": 0.37330219545009785, "calibration/coverage@20%": 0.45343153742661446, "calibration/coverage@25%": 0.509323018590998, "calibration/coverage@30%": 0.5761764615949119, "calibration/coverage@5%": 0.11763851516634052, "calibration/ece": 0.14744758144939113, "calibration/mean_confidence": 0.5285503908524433, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1123.4, "completions/max_terminated_length": 705.4, "completions/mean_length": 187.6494140625, "completions/mean_terminated_length": 186.8601806640625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.32, "grad_norm": 0.0008596840780228376, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 335391521.0, "reward": 0.9921112775802612, "reward_std": 0.09160036891698838, "rewards/accuracy_reward": 0.51875, "rewards/brier_reward": 0.7743848919868469, "rewards/confidence_uniqueness_reward": 0.9427272796630859, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0026898517040535807, "rewards/frontier_coverage_1": 0.1416488030925393, "rewards/frontier_coverage_10": 0.1416488030925393, "rewards/frontier_coverage_15": 0.1416488030925393, "rewards/frontier_coverage_20": 0.1416488030925393, "rewards/frontier_coverage_25": 0.1416488030925393, "rewards/frontier_coverage_5": 0.1416488030925393, "rewards/frontier_ece_reward": 0.026192883402109145, "signal/accuracy_reward/centered_abs_mean": 0.0979736328125, "signal/accuracy_reward/group_std_mean": 0.1366899386048317, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04898681640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04898681640625, "signal/advantage_abs_mean": 0.06886067688465118, "signal/advantage_pre_scale_abs_mean": 0.06886067688465118, "signal/advantage_pre_scale_std": 0.11252744793891907, "signal/advantage_std": 0.11252744793891907, "signal/brier_reward/centered_abs_mean": 0.16976939141750336, "signal/brier_reward/group_std_mean": 0.21671704649925233, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122117392718792, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02122117392718792, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03321526050567627, "signal/confidence_uniqueness_reward/group_std_mean": 0.04269362464547157, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0041519075632095335, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041519075632095335, "signal/format_reward/centered_abs_mean": 0.001300048828125, "signal/format_reward/group_std_mean": 0.0031943732872605326, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025342844892293215, "signal/frontier_aurc_reward/group_std_mean": 0.0038999815471470354, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.536369087873027e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.536369087873027e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20525244176387786, "signal/frontier_coverage_1/group_std_mean": 0.2671793639659882, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_10/centered_abs_mean": 0.20525244176387786, "signal/frontier_coverage_10/group_std_mean": 0.2671793639659882, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_15/centered_abs_mean": 0.20525244176387786, "signal/frontier_coverage_15/group_std_mean": 0.2671793639659882, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_20/centered_abs_mean": 0.20525244176387786, "signal/frontier_coverage_20/group_std_mean": 0.2671793639659882, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_25/centered_abs_mean": 0.20525244176387786, "signal/frontier_coverage_25/group_std_mean": 0.2671793639659882, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_5/centered_abs_mean": 0.20525244176387786, "signal/frontier_coverage_5/group_std_mean": 0.2671793639659882, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036740186624228954, "signal/frontier_ece_reward/centered_abs_mean": 0.037545930594205856, "signal/frontier_ece_reward/group_std_mean": 0.04693235754966736, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004693241324275732, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004693241324275732, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4974161459569359, "eval_calibration/batch_distribution_entropy": 0.8600728019210881, "eval_calibration/buffer_distribution_entropy": 0.9585032256106852, "eval_calibration/confidence_entropy": 0.39179126853215673, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.078125, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.1796875, "eval_calibration/coverage@30%": 0.265625, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.20821445704912134, "eval_calibration/mean_confidence": 0.45275429295087866, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 348.75, "eval_completions/max_terminated_length": 348.75, "eval_completions/mean_length": 189.11557006835938, "eval_completions/mean_terminated_length": 189.11557006835938, "eval_completions/min_length": 98.75, "eval_completions/min_terminated_length": 98.75, "eval_loss": 0.0, "eval_num_tokens": 335391521.0, "eval_reward": 0.9335773140192032, "eval_reward_std": 0.22667960077524185, "eval_rewards/accuracy_reward": 0.40234375, "eval_rewards/brier_reward": 0.7676407843828201, "eval_rewards/confidence_uniqueness_reward": 0.89111328125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0035294744884595275, "eval_rewards/frontier_coverage_1": 0.21452518552541733, "eval_rewards/frontier_coverage_10": 0.21452518552541733, "eval_rewards/frontier_coverage_15": 0.21452518552541733, "eval_rewards/frontier_coverage_20": 0.21452518552541733, "eval_rewards/frontier_coverage_25": 0.21452518552541733, "eval_rewards/frontier_coverage_5": 0.21452518552541733, "eval_rewards/frontier_ece_reward": 0.01667470997199416, "eval_runtime": 18.5202, "eval_samples_per_second": 26.998, "eval_signal/accuracy_reward/centered_abs_mean": 0.468994140625, "eval_signal/accuracy_reward/group_std_mean": 0.49156785011291504, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2344970703125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2344970703125, "eval_signal/advantage_abs_mean": 0.20321417972445488, "eval_signal/advantage_pre_scale_abs_mean": 0.20321417972445488, "eval_signal/advantage_pre_scale_std": 0.22419220209121704, "eval_signal/advantage_std": 0.22419220209121704, "eval_signal/brier_reward/centered_abs_mean": 0.24414894357323647, "eval_signal/brier_reward/group_std_mean": 0.2980574741959572, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030518617946654558, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.030518617946654558, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0486907958984375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05951074603945017, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060863494873046875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060863494873046875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0035710909869521856, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005775783443823457, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.392252726072911e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.392252726072911e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.4047232046723366, "eval_signal/frontier_coverage_1/group_std_mean": 0.4975067600607872, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.4047232046723366, "eval_signal/frontier_coverage_10/group_std_mean": 0.4975067600607872, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.4047232046723366, "eval_signal/frontier_coverage_15/group_std_mean": 0.4975067600607872, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.4047232046723366, "eval_signal/frontier_coverage_20/group_std_mean": 0.4975067600607872, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.4047232046723366, "eval_signal/frontier_coverage_25/group_std_mean": 0.4975067600607872, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.4047232046723366, "eval_signal/frontier_coverage_5/group_std_mean": 0.4975067600607872, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007244544918648899, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.04583758395165205, "eval_signal/frontier_ece_reward/group_std_mean": 0.06471480429172516, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005729697993956506, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005729697993956506, "eval_steps_per_second": 0.216, "step": 100 }, { "calibration/aurc": 0.2958493535528611, "calibration/batch_distribution_entropy": 0.9413453167838378, "calibration/buffer_distribution_entropy": 0.9608605051679421, "calibration/confidence_entropy": 0.4114441599205099, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.06491484222113503, "calibration/coverage@15%": 0.13764829990215263, "calibration/coverage@20%": 0.22517505503913893, "calibration/coverage@25%": 0.3994274400684931, "calibration/coverage@30%": 0.5850224743150685, "calibration/coverage@5%": 0.011328125, "calibration/ece": 0.1434243952954204, "calibration/mean_confidence": 0.49109226134555123, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1100.6, "completions/max_terminated_length": 520.0, "completions/mean_length": 190.394921875, "completions/mean_terminated_length": 189.8685272216797, "completions/min_length": 82.8, "completions/min_terminated_length": 82.8, "epoch": 0.336, "grad_norm": 0.0011786021059378982, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 352063597.0, "reward": 0.9939133524894714, "reward_std": 0.09802540838718414, "rewards/accuracy_reward": 0.526171875, "rewards/brier_reward": 0.7703649401664734, "rewards/confidence_uniqueness_reward": 0.9453340649604798, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002737466990947723, "rewards/frontier_coverage_1": 0.1277718111872673, "rewards/frontier_coverage_10": 0.1277718111872673, "rewards/frontier_coverage_15": 0.1277718111872673, "rewards/frontier_coverage_20": 0.1277718111872673, "rewards/frontier_coverage_25": 0.1277718111872673, "rewards/frontier_coverage_5": 0.1277718111872673, "rewards/frontier_ece_reward": 0.02348385229706764, "signal/accuracy_reward/centered_abs_mean": 0.11649169921875, "signal/accuracy_reward/group_std_mean": 0.1568697527050972, "signal/accuracy_reward/group_zero_std_frac": 0.54375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058245849609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.058245849609375, "signal/advantage_abs_mean": 0.07433497905731201, "signal/advantage_pre_scale_abs_mean": 0.07433497905731201, "signal/advantage_pre_scale_std": 0.11875344961881637, "signal/advantage_std": 0.11875344961881637, "signal/brier_reward/centered_abs_mean": 0.17236365377902985, "signal/brier_reward/group_std_mean": 0.21735928058624268, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02154545672237873, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02154545672237873, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030726969614624976, "signal/confidence_uniqueness_reward/group_std_mean": 0.03968273177742958, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003840871201828122, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003840871201828122, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025001152884215117, "signal/frontier_aurc_reward/group_std_mean": 0.00392577862367034, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4752060784958306e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4752060784958306e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21679833829402922, "signal/frontier_coverage_1/group_std_mean": 0.2767444133758545, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_10/centered_abs_mean": 0.21679833829402922, "signal/frontier_coverage_10/group_std_mean": 0.2767444133758545, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_15/centered_abs_mean": 0.21679833829402922, "signal/frontier_coverage_15/group_std_mean": 0.2767444133758545, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_20/centered_abs_mean": 0.21679833829402922, "signal/frontier_coverage_20/group_std_mean": 0.2767444133758545, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_25/centered_abs_mean": 0.21679833829402922, "signal/frontier_coverage_25/group_std_mean": 0.2767444133758545, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_5/centered_abs_mean": 0.21679833829402922, "signal/frontier_coverage_5/group_std_mean": 0.2767444133758545, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003880690271034837, "signal/frontier_ece_reward/centered_abs_mean": 0.03399265930056572, "signal/frontier_ece_reward/group_std_mean": 0.043000844120979306, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004249082412570715, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004249082412570715, "step": 105 }, { "calibration/aurc": 0.33002620817581574, "calibration/batch_distribution_entropy": 0.8972123286813798, "calibration/buffer_distribution_entropy": 0.9671293966456596, "calibration/confidence_entropy": 0.37335880071175015, "calibration/coverage@0%": 0.026593077299412914, "calibration/coverage@1%": 0.026593077299412914, "calibration/coverage@10%": 0.14979283879647748, "calibration/coverage@15%": 0.26584056996086103, "calibration/coverage@20%": 0.3733717588062623, "calibration/coverage@25%": 0.42732387475538164, "calibration/coverage@30%": 0.49458628913894326, "calibration/coverage@5%": 0.047296202299412914, "calibration/ece": 0.14116460423344823, "calibration/mean_confidence": 0.43884960142619966, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1208.4, "completions/max_terminated_length": 605.8, "completions/mean_length": 191.46328125, "completions/mean_terminated_length": 190.8067199707031, "completions/min_length": 84.4, "completions/min_terminated_length": 84.4, "epoch": 0.352, "grad_norm": 0.0026657464914023876, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 369284597.0, "reward": 0.9673644065856933, "reward_std": 0.09604953676462173, "rewards/accuracy_reward": 0.464453125, "rewards/brier_reward": 0.7727057933807373, "rewards/confidence_uniqueness_reward": 0.9422548532485961, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0032384898513555527, "rewards/frontier_coverage_1": 0.17688873410224915, "rewards/frontier_coverage_10": 0.17688873410224915, "rewards/frontier_coverage_15": 0.17688873410224915, "rewards/frontier_coverage_20": 0.17688873410224915, "rewards/frontier_coverage_25": 0.17688873410224915, "rewards/frontier_coverage_5": 0.17688873410224915, "rewards/frontier_ece_reward": 0.017748223431408406, "signal/accuracy_reward/centered_abs_mean": 0.11265869140625, "signal/accuracy_reward/group_std_mean": 0.15127451121807098, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056329345703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.056329345703125, "signal/advantage_abs_mean": 0.07229470312595368, "signal/advantage_pre_scale_abs_mean": 0.07229470312595368, "signal/advantage_pre_scale_std": 0.11684101819992065, "signal/advantage_std": 0.11684101819992065, "signal/brier_reward/centered_abs_mean": 0.16957641541957855, "signal/brier_reward/group_std_mean": 0.2178028106689453, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02119705192744732, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02119705192744732, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03406139500439167, "signal/confidence_uniqueness_reward/group_std_mean": 0.044897759705781935, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004257674375548959, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004257674375548959, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028988351114094257, "signal/frontier_aurc_reward/group_std_mean": 0.004550885502249002, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.188914583413862e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.188914583413862e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21616642773151398, "signal/frontier_coverage_1/group_std_mean": 0.27745549082756044, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_10/centered_abs_mean": 0.21616642773151398, "signal/frontier_coverage_10/group_std_mean": 0.27745549082756044, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_15/centered_abs_mean": 0.21616642773151398, "signal/frontier_coverage_15/group_std_mean": 0.27745549082756044, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_20/centered_abs_mean": 0.21616642773151398, "signal/frontier_coverage_20/group_std_mean": 0.27745549082756044, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_25/centered_abs_mean": 0.21616642773151398, "signal/frontier_coverage_25/group_std_mean": 0.27745549082756044, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_5/centered_abs_mean": 0.21616642773151398, "signal/frontier_coverage_5/group_std_mean": 0.27745549082756044, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003869378939270973, "signal/frontier_ece_reward/centered_abs_mean": 0.028543695434927942, "signal/frontier_ece_reward/group_std_mean": 0.03598736748099327, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035679619293659927, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035679619293659927, "step": 110 }, { "calibration/aurc": 0.34684431490162665, "calibration/batch_distribution_entropy": 0.9249764330331744, "calibration/buffer_distribution_entropy": 0.9721049923704769, "calibration/confidence_entropy": 0.3966501657494727, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.048828125, "calibration/coverage@15%": 0.083984375, "calibration/coverage@20%": 0.240234375, "calibration/coverage@25%": 0.347265625, "calibration/coverage@30%": 0.48951122186888457, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14233482628214694, "calibration/mean_confidence": 0.5210622953382034, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 855.8, "completions/max_terminated_length": 648.0, "completions/mean_length": 191.449609375, "completions/mean_terminated_length": 191.1877471923828, "completions/min_length": 83.2, "completions/min_terminated_length": 83.2, "epoch": 0.368, "grad_norm": 0.0011664318153634667, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 386310513.0, "reward": 0.980566680431366, "reward_std": 0.0891783744096756, "rewards/accuracy_reward": 0.49208984375, "rewards/brier_reward": 0.7773085832595825, "rewards/confidence_uniqueness_reward": 0.9470725178718566, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0032397733069956304, "rewards/frontier_coverage_1": 0.15876154750585555, "rewards/frontier_coverage_10": 0.15876154750585555, "rewards/frontier_coverage_15": 0.15876154750585555, "rewards/frontier_coverage_20": 0.15876154750585555, "rewards/frontier_coverage_25": 0.15876154750585555, "rewards/frontier_coverage_5": 0.15876154750585555, "rewards/frontier_ece_reward": 0.016630425490438937, "signal/accuracy_reward/centered_abs_mean": 0.102740478515625, "signal/accuracy_reward/group_std_mean": 0.13781636953353882, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0513702392578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0513702392578125, "signal/advantage_abs_mean": 0.06816617250442505, "signal/advantage_pre_scale_abs_mean": 0.06816617250442505, "signal/advantage_pre_scale_std": 0.11163422465324402, "signal/advantage_std": 0.11163422465324402, "signal/brier_reward/centered_abs_mean": 0.16006246507167815, "signal/brier_reward/group_std_mean": 0.20494329929351807, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02000780813395977, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02000780813395977, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028726159036159514, "signal/confidence_uniqueness_reward/group_std_mean": 0.03694523498415947, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035907698795199392, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035907698795199392, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028500501066446304, "signal/frontier_aurc_reward/group_std_mean": 0.004479775950312614, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1015896315220746e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1015896315220746e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19571449756622314, "signal/frontier_coverage_1/group_std_mean": 0.2562991797924042, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_10/centered_abs_mean": 0.19571449756622314, "signal/frontier_coverage_10/group_std_mean": 0.2562991797924042, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_15/centered_abs_mean": 0.19571449756622314, "signal/frontier_coverage_15/group_std_mean": 0.2562991797924042, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_20/centered_abs_mean": 0.19571449756622314, "signal/frontier_coverage_20/group_std_mean": 0.2562991797924042, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_25/centered_abs_mean": 0.19571449756622314, "signal/frontier_coverage_25/group_std_mean": 0.2562991797924042, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_5/centered_abs_mean": 0.19571449756622314, "signal/frontier_coverage_5/group_std_mean": 0.2562991797924042, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035032893996685744, "signal/frontier_ece_reward/centered_abs_mean": 0.023004084080457687, "signal/frontier_ece_reward/group_std_mean": 0.028804820030927658, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002875510510057211, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002875510510057211, "step": 115 }, { "calibration/aurc": 0.33882551551583673, "calibration/batch_distribution_entropy": 0.9004682009504199, "calibration/buffer_distribution_entropy": 0.9747984378991879, "calibration/confidence_entropy": 0.3752828914999894, "calibration/coverage@0%": 0.03828125, "calibration/coverage@1%": 0.03828125, "calibration/coverage@10%": 0.166796875, "calibration/coverage@15%": 0.23203125, "calibration/coverage@20%": 0.294921875, "calibration/coverage@25%": 0.3328125, "calibration/coverage@30%": 0.390234375, "calibration/coverage@5%": 0.063671875, "calibration/ece": 0.14781379384913512, "calibration/mean_confidence": 0.46431405798825054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1023.8, "completions/max_terminated_length": 643.2, "completions/mean_length": 190.45791015625, "completions/mean_terminated_length": 189.801416015625, "completions/min_length": 84.2, "completions/min_terminated_length": 84.2, "epoch": 0.384, "grad_norm": 0.0009418850531801581, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 403117314.0, "reward": 0.9945974946022034, "reward_std": 0.08855005800724029, "rewards/accuracy_reward": 0.52099609375, "rewards/brier_reward": 0.7850892663002014, "rewards/confidence_uniqueness_reward": 0.9485597133636474, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0032369979191571472, "rewards/frontier_coverage_1": 0.1466797597706318, "rewards/frontier_coverage_10": 0.1466797597706318, "rewards/frontier_coverage_15": 0.1466797597706318, "rewards/frontier_coverage_20": 0.1466797597706318, "rewards/frontier_coverage_25": 0.1466797597706318, "rewards/frontier_coverage_5": 0.1466797597706318, "rewards/frontier_ece_reward": 0.01553578432649374, "signal/accuracy_reward/centered_abs_mean": 0.106524658203125, "signal/accuracy_reward/group_std_mean": 0.14518197476863862, "signal/accuracy_reward/group_zero_std_frac": 0.565625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0532623291015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0532623291015625, "signal/advantage_abs_mean": 0.06687061563134193, "signal/advantage_pre_scale_abs_mean": 0.06687061563134193, "signal/advantage_pre_scale_std": 0.11217147409915924, "signal/advantage_std": 0.11217147409915924, "signal/brier_reward/centered_abs_mean": 0.1526328980922699, "signal/brier_reward/group_std_mean": 0.19572176933288574, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019079112261533738, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019079112261533738, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027018361538648606, "signal/confidence_uniqueness_reward/group_std_mean": 0.03468450009822845, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033772951923310758, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033772951923310758, "signal/format_reward/centered_abs_mean": 0.000872802734375, "signal/format_reward/group_std_mean": 0.0016024607699364423, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004364013671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004364013671875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003183392807841301, "signal/frontier_aurc_reward/group_std_mean": 0.004988422710448503, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6982728710863737e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6982728710863737e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18951932489871978, "signal/frontier_coverage_1/group_std_mean": 0.24623486995697022, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_10/centered_abs_mean": 0.18951932489871978, "signal/frontier_coverage_10/group_std_mean": 0.24623486995697022, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_15/centered_abs_mean": 0.18951932489871978, "signal/frontier_coverage_15/group_std_mean": 0.24623486995697022, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_20/centered_abs_mean": 0.18951932489871978, "signal/frontier_coverage_20/group_std_mean": 0.24623486995697022, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_25/centered_abs_mean": 0.18951932489871978, "signal/frontier_coverage_25/group_std_mean": 0.24623486995697022, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_5/centered_abs_mean": 0.18951932489871978, "signal/frontier_coverage_5/group_std_mean": 0.24623486995697022, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033923957496881487, "signal/frontier_ece_reward/centered_abs_mean": 0.019004416465759278, "signal/frontier_ece_reward/group_std_mean": 0.02362184412777424, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023755520582199098, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023755520582199098, "step": 120 }, { "calibration/aurc": 0.43027029729660676, "calibration/batch_distribution_entropy": 0.9443037742377609, "calibration/buffer_distribution_entropy": 0.9749766621972892, "calibration/confidence_entropy": 0.420648326243084, "calibration/coverage@0%": 0.000392156862745098, "calibration/coverage@1%": 0.000392156862745098, "calibration/coverage@10%": 0.000392156862745098, "calibration/coverage@15%": 0.003126531862745098, "calibration/coverage@20%": 0.009376531862745098, "calibration/coverage@25%": 0.02657014732262768, "calibration/coverage@30%": 0.17616181446893825, "calibration/coverage@5%": 0.000392156862745098, "calibration/ece": 0.19883478146558858, "calibration/mean_confidence": 0.5291464915604904, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1158.6, "completions/max_terminated_length": 692.4, "completions/mean_length": 190.79619140625, "completions/mean_terminated_length": 190.40262451171876, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.4, "grad_norm": 0.001098749809898436, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 420107515.0, "reward": 0.9793721914291382, "reward_std": 0.09981218427419662, "rewards/accuracy_reward": 0.49814453125, "rewards/brier_reward": 0.7662390470504761, "rewards/confidence_uniqueness_reward": 0.9503534436225891, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.004101655632257462, "rewards/frontier_coverage_1": 0.13619700074195862, "rewards/frontier_coverage_10": 0.13619700074195862, "rewards/frontier_coverage_15": 0.13619700074195862, "rewards/frontier_coverage_20": 0.13619700074195862, "rewards/frontier_coverage_25": 0.1293622836470604, "rewards/frontier_coverage_5": 0.13619700074195862, "rewards/frontier_ece_reward": 0.01191479042172432, "signal/accuracy_reward/centered_abs_mean": 0.123504638671875, "signal/accuracy_reward/group_std_mean": 0.1655549794435501, "signal/accuracy_reward/group_zero_std_frac": 0.521875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0617523193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0617523193359375, "signal/advantage_abs_mean": 0.07545655816793442, "signal/advantage_pre_scale_abs_mean": 0.07545655816793442, "signal/advantage_pre_scale_std": 0.12451154887676238, "signal/advantage_std": 0.12451154887676238, "signal/brier_reward/centered_abs_mean": 0.16211409568786622, "signal/brier_reward/group_std_mean": 0.20733815133571626, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020264261960983278, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020264261960983278, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02588532753288746, "signal/confidence_uniqueness_reward/group_std_mean": 0.034060098230838776, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032356659416109324, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032356659416109324, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.004065265553072095, "signal/frontier_aurc_reward/group_std_mean": 0.006544529832899571, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.276825199369341e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.276825199369341e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1825489729642868, "signal/frontier_coverage_1/group_std_mean": 0.2414218693971634, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_10/centered_abs_mean": 0.1825489729642868, "signal/frontier_coverage_10/group_std_mean": 0.2414218693971634, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_15/centered_abs_mean": 0.1825489729642868, "signal/frontier_coverage_15/group_std_mean": 0.2414218693971634, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_20/centered_abs_mean": 0.1825489729642868, "signal/frontier_coverage_20/group_std_mean": 0.2414218693971634, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_25/centered_abs_mean": 0.17083930373191833, "signal/frontier_coverage_25/group_std_mean": 0.226739364862442, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030580234713852407, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030580234713852407, "signal/frontier_coverage_5/centered_abs_mean": 0.1825489729642868, "signal/frontier_coverage_5/group_std_mean": 0.2414218693971634, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032676266506314277, "signal/frontier_ece_reward/centered_abs_mean": 0.017164209112524986, "signal/frontier_ece_reward/group_std_mean": 0.02124990485608578, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002145526139065623, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002145526139065623, "step": 125 }, { "calibration/aurc": 0.3307413637301698, "calibration/batch_distribution_entropy": 0.9539248294554381, "calibration/buffer_distribution_entropy": 0.974026721883375, "calibration/confidence_entropy": 0.445160747267321, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.0332665728962818, "calibration/coverage@20%": 0.14390670865949118, "calibration/coverage@25%": 0.2673984833659491, "calibration/coverage@30%": 0.40337114726027395, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.12327517119786222, "calibration/mean_confidence": 0.54675080181613, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 957.0, "completions/max_terminated_length": 581.0, "completions/mean_length": 193.95068359375, "completions/mean_terminated_length": 193.68844604492188, "completions/min_length": 90.6, "completions/min_terminated_length": 90.6, "epoch": 0.416, "grad_norm": 0.001056396751664579, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 436974754.0, "reward": 0.9854490041732789, "reward_std": 0.09411467611789703, "rewards/accuracy_reward": 0.5017578125, "rewards/brier_reward": 0.7856782793998718, "rewards/confidence_uniqueness_reward": 0.9530214190483093, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0033007480669766665, "rewards/frontier_coverage_1": 0.1507933869957924, "rewards/frontier_coverage_10": 0.1507933869957924, "rewards/frontier_coverage_15": 0.1507933869957924, "rewards/frontier_coverage_20": 0.1507933869957924, "rewards/frontier_coverage_25": 0.14029909968376159, "rewards/frontier_coverage_5": 0.1507933869957924, "rewards/frontier_ece_reward": 0.01144680418074131, "signal/accuracy_reward/centered_abs_mean": 0.12384033203125, "signal/accuracy_reward/group_std_mean": 0.15653499066829682, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061920166015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.061920166015625, "signal/advantage_abs_mean": 0.07388749271631241, "signal/advantage_pre_scale_abs_mean": 0.07388749271631241, "signal/advantage_pre_scale_std": 0.12038870304822921, "signal/advantage_std": 0.12038870304822921, "signal/brier_reward/centered_abs_mean": 0.15659076273441314, "signal/brier_reward/group_std_mean": 0.19731901586055756, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019573845341801642, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019573845341801642, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023388362675905227, "signal/confidence_uniqueness_reward/group_std_mean": 0.031004397571086882, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029235453344881534, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029235453344881534, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003098398260772228, "signal/frontier_aurc_reward/group_std_mean": 0.00491497041657567, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.546132597373798e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.546132597373798e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19432482421398162, "signal/frontier_coverage_1/group_std_mean": 0.24642258882522583, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_10/centered_abs_mean": 0.19432482421398162, "signal/frontier_coverage_10/group_std_mean": 0.24642258882522583, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_15/centered_abs_mean": 0.19432482421398162, "signal/frontier_coverage_15/group_std_mean": 0.24642258882522583, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_20/centered_abs_mean": 0.19432482421398162, "signal/frontier_coverage_20/group_std_mean": 0.24642258882522583, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_25/centered_abs_mean": 0.17647163271903993, "signal/frontier_coverage_25/group_std_mean": 0.22427623569965363, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031588422134518623, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031588422134518623, "signal/frontier_coverage_5/centered_abs_mean": 0.19432482421398162, "signal/frontier_coverage_5/group_std_mean": 0.24642258882522583, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034784142393618823, "signal/frontier_ece_reward/centered_abs_mean": 0.014592299051582814, "signal/frontier_ece_reward/group_std_mean": 0.018116169050335883, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018240373814478517, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018240373814478517, "step": 130 }, { "calibration/aurc": 0.2824695747893299, "calibration/batch_distribution_entropy": 0.9428787987315607, "calibration/buffer_distribution_entropy": 0.9719925690177561, "calibration/confidence_entropy": 0.41514944308510227, "calibration/coverage@0%": 0.020703125, "calibration/coverage@1%": 0.020703125, "calibration/coverage@10%": 0.147265625, "calibration/coverage@15%": 0.242578125, "calibration/coverage@20%": 0.3546875, "calibration/coverage@25%": 0.405859375, "calibration/coverage@30%": 0.46328125, "calibration/coverage@5%": 0.028125, "calibration/ece": 0.11745400373456068, "calibration/mean_confidence": 0.5382869135028088, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 703.4, "completions/max_terminated_length": 544.8, "completions/mean_length": 194.23974609375, "completions/mean_terminated_length": 194.10873718261718, "completions/min_length": 92.4, "completions/min_terminated_length": 92.4, "epoch": 0.432, "grad_norm": 0.0009216758189722896, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 453978105.0, "reward": 1.0048931002616883, "reward_std": 0.08277135789394378, "rewards/accuracy_reward": 0.53837890625, "rewards/brier_reward": 0.8011577129364014, "rewards/confidence_uniqueness_reward": 0.9518381714820862, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027131067123264073, "rewards/frontier_coverage_1": 0.1438295803964138, "rewards/frontier_coverage_10": 0.1438295803964138, "rewards/frontier_coverage_15": 0.1438295803964138, "rewards/frontier_coverage_20": 0.1438295803964138, "rewards/frontier_coverage_25": 0.1322506435215473, "rewards/frontier_coverage_5": 0.1438295803964138, "rewards/frontier_ece_reward": 0.01188302058726549, "signal/accuracy_reward/centered_abs_mean": 0.114117431640625, "signal/accuracy_reward/group_std_mean": 0.14671022891998292, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0570587158203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0570587158203125, "signal/advantage_abs_mean": 0.06361640393733978, "signal/advantage_pre_scale_abs_mean": 0.06361640393733978, "signal/advantage_pre_scale_std": 0.10817221403121949, "signal/advantage_std": 0.10817221403121949, "signal/brier_reward/centered_abs_mean": 0.13704997897148133, "signal/brier_reward/group_std_mean": 0.17600221931934357, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017131247371435166, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017131247371435166, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023459725454449652, "signal/confidence_uniqueness_reward/group_std_mean": 0.03010900169610977, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029324656818062065, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029324656818062065, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002545620733872056, "signal/frontier_aurc_reward/group_std_mean": 0.004147910000756383, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.556660787784495e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.556660787784495e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1864376574754715, "signal/frontier_coverage_1/group_std_mean": 0.23926096856594087, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_10/centered_abs_mean": 0.1864376574754715, "signal/frontier_coverage_10/group_std_mean": 0.23926096856594087, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_15/centered_abs_mean": 0.1864376574754715, "signal/frontier_coverage_15/group_std_mean": 0.23926096856594087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_20/centered_abs_mean": 0.1864376574754715, "signal/frontier_coverage_20/group_std_mean": 0.23926096856594087, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_25/centered_abs_mean": 0.16645156741142272, "signal/frontier_coverage_25/group_std_mean": 0.21410418748855592, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029794828966259955, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029794828966259955, "signal/frontier_coverage_5/centered_abs_mean": 0.1864376574754715, "signal/frontier_coverage_5/group_std_mean": 0.23926096856594087, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003337233932688832, "signal/frontier_ece_reward/centered_abs_mean": 0.01259520035237074, "signal/frontier_ece_reward/group_std_mean": 0.015745421312749384, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015744000440463424, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015744000440463424, "step": 135 }, { "calibration/aurc": 0.29260948300243605, "calibration/batch_distribution_entropy": 0.9606903533224823, "calibration/buffer_distribution_entropy": 0.9695915320747955, "calibration/confidence_entropy": 0.4582926324034869, "calibration/coverage@0%": 0.02734375, "calibration/coverage@1%": 0.02734375, "calibration/coverage@10%": 0.063671875, "calibration/coverage@15%": 0.116015625, "calibration/coverage@20%": 0.23125, "calibration/coverage@25%": 0.373046875, "calibration/coverage@30%": 0.53828125, "calibration/coverage@5%": 0.036328125, "calibration/ece": 0.14489365937185286, "calibration/mean_confidence": 0.5384731583322886, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 584.8, "completions/max_terminated_length": 584.8, "completions/mean_length": 201.206640625, "completions/mean_terminated_length": 201.206640625, "completions/min_length": 78.6, "completions/min_terminated_length": 78.6, "epoch": 0.448, "grad_norm": 0.0008310034754686058, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 470991261.0, "reward": 0.9908460736274719, "reward_std": 0.08178583383560181, "rewards/accuracy_reward": 0.5119140625, "rewards/brier_reward": 0.7910648584365845, "rewards/confidence_uniqueness_reward": 0.952523159980774, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002787482738494873, "rewards/frontier_coverage_1": 0.15150942653417587, "rewards/frontier_coverage_10": 0.15150942653417587, "rewards/frontier_coverage_15": 0.15150942653417587, "rewards/frontier_coverage_20": 0.15150942653417587, "rewards/frontier_coverage_25": 0.13818347454071045, "rewards/frontier_coverage_5": 0.15150942653417587, "rewards/frontier_ece_reward": 0.00960810985416174, "signal/accuracy_reward/centered_abs_mean": 0.108056640625, "signal/accuracy_reward/group_std_mean": 0.14055088460445403, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0540283203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0540283203125, "signal/advantage_abs_mean": 0.0633295938372612, "signal/advantage_pre_scale_abs_mean": 0.0633295938372612, "signal/advantage_pre_scale_std": 0.1058726117014885, "signal/advantage_std": 0.1058726117014885, "signal/brier_reward/centered_abs_mean": 0.14405288696289062, "signal/brier_reward/group_std_mean": 0.18299511671066285, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018006610870361327, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018006610870361327, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022896628081798553, "signal/confidence_uniqueness_reward/group_std_mean": 0.029164545238018036, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002862078510224819, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002862078510224819, "signal/format_reward/centered_abs_mean": 0.000823974609375, "signal/format_reward/group_std_mean": 0.0011528188362717629, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004119873046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004119873046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002245605061762035, "signal/frontier_aurc_reward/group_std_mean": 0.00356750157661736, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0196329064201566e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0196329064201566e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1972437471151352, "signal/frontier_coverage_1/group_std_mean": 0.2506330370903015, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_10/centered_abs_mean": 0.1972437471151352, "signal/frontier_coverage_10/group_std_mean": 0.2506330370903015, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_15/centered_abs_mean": 0.1972437471151352, "signal/frontier_coverage_15/group_std_mean": 0.2506330370903015, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_20/centered_abs_mean": 0.1972437471151352, "signal/frontier_coverage_20/group_std_mean": 0.2506330370903015, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_25/centered_abs_mean": 0.1650971680879593, "signal/frontier_coverage_25/group_std_mean": 0.21136927902698516, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029552392661571503, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029552392661571503, "signal/frontier_coverage_5/centered_abs_mean": 0.1972437471151352, "signal/frontier_coverage_5/group_std_mean": 0.2506330370903015, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003530662879347801, "signal/frontier_ece_reward/centered_abs_mean": 0.012216190062463283, "signal/frontier_ece_reward/group_std_mean": 0.015183654241263866, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015270237578079104, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015270237578079104, "step": 140 }, { "calibration/aurc": 0.4077113827188322, "calibration/batch_distribution_entropy": 0.9615084032212978, "calibration/buffer_distribution_entropy": 0.9685401991489083, "calibration/confidence_entropy": 0.45527402259160316, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.009765625, "calibration/coverage@15%": 0.04296875, "calibration/coverage@20%": 0.071484375, "calibration/coverage@25%": 0.191796875, "calibration/coverage@30%": 0.28515625, "calibration/coverage@5%": 0.003515625, "calibration/ece": 0.1355396044993245, "calibration/mean_confidence": 0.45190300263963845, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 666.2, "completions/max_terminated_length": 450.4, "completions/mean_length": 208.301953125, "completions/mean_terminated_length": 208.17261352539063, "completions/min_length": 96.6, "completions/min_terminated_length": 96.6, "epoch": 0.464, "grad_norm": 0.0008374059689231217, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 488295089.0, "reward": 0.9605985045433044, "reward_std": 0.07935848534107208, "rewards/accuracy_reward": 0.45380859375, "rewards/brier_reward": 0.7694530367851258, "rewards/confidence_uniqueness_reward": 0.9470274209976196, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.00324577521532774, "rewards/frontier_coverage_1": 0.17469169050455094, "rewards/frontier_coverage_10": 0.17469169050455094, "rewards/frontier_coverage_15": 0.17469169050455094, "rewards/frontier_coverage_20": 0.17469169050455094, "rewards/frontier_coverage_25": 0.1511695146560669, "rewards/frontier_coverage_5": 0.17469169050455094, "rewards/frontier_ece_reward": 0.007592650689184665, "signal/accuracy_reward/centered_abs_mean": 0.096746826171875, "signal/accuracy_reward/group_std_mean": 0.130436909198761, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0483734130859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0483734130859375, "signal/advantage_abs_mean": 0.06013599261641502, "signal/advantage_pre_scale_abs_mean": 0.06013599261641502, "signal/advantage_pre_scale_std": 0.10274101942777633, "signal/advantage_std": 0.10274101942777633, "signal/brier_reward/centered_abs_mean": 0.1392355114221573, "signal/brier_reward/group_std_mean": 0.17765427827835084, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01740443892776966, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01740443892776966, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025603653863072395, "signal/confidence_uniqueness_reward/group_std_mean": 0.03302198946475983, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032004567328840494, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032004567328840494, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022237420780584216, "signal/frontier_aurc_reward/group_std_mean": 0.003647429635748267, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.980498222517781e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.980498222517781e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1835351675748825, "signal/frontier_coverage_1/group_std_mean": 0.23672112226486205, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_10/centered_abs_mean": 0.1835351675748825, "signal/frontier_coverage_10/group_std_mean": 0.23672112226486205, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_15/centered_abs_mean": 0.1835351675748825, "signal/frontier_coverage_15/group_std_mean": 0.23672112226486205, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_20/centered_abs_mean": 0.1835351675748825, "signal/frontier_coverage_20/group_std_mean": 0.23672112226486205, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_25/centered_abs_mean": 0.15318235754966736, "signal/frontier_coverage_25/group_std_mean": 0.1978419154882431, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002741964068263769, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002741964068263769, "signal/frontier_coverage_5/centered_abs_mean": 0.1835351675748825, "signal/frontier_coverage_5/group_std_mean": 0.23672112226486205, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003285279218107462, "signal/frontier_ece_reward/centered_abs_mean": 0.010807633772492409, "signal/frontier_ece_reward/group_std_mean": 0.013707993924617768, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013509542215615511, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013509542215615511, "step": 145 }, { "calibration/aurc": 0.30240157627696823, "calibration/batch_distribution_entropy": 0.9399514031439473, "calibration/buffer_distribution_entropy": 0.9665852737948889, "calibration/confidence_entropy": 0.422597238136302, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.0546875, "calibration/coverage@15%": 0.19296875, "calibration/coverage@20%": 0.2640625, "calibration/coverage@25%": 0.416796875, "calibration/coverage@30%": 0.47890625, "calibration/coverage@5%": 0.017578125, "calibration/ece": 0.16375171296404, "calibration/mean_confidence": 0.46227563078596, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 959.2, "completions/max_terminated_length": 560.2, "completions/mean_length": 210.96083984375, "completions/mean_terminated_length": 210.70225830078124, "completions/min_length": 100.8, "completions/min_terminated_length": 100.8, "epoch": 0.48, "grad_norm": 0.000989911612123251, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 505503360.0, "reward": 0.987113094329834, "reward_std": 0.08181465268135071, "rewards/accuracy_reward": 0.511328125, "rewards/brier_reward": 0.7769276857376098, "rewards/confidence_uniqueness_reward": 0.9411561250686645, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0026606434723362325, "rewards/frontier_coverage_1": 0.14955383241176606, "rewards/frontier_coverage_10": 0.14955383241176606, "rewards/frontier_coverage_15": 0.14955383241176606, "rewards/frontier_coverage_20": 0.14955383241176606, "rewards/frontier_coverage_25": 0.1367091566324234, "rewards/frontier_coverage_5": 0.14955383241176606, "rewards/frontier_ece_reward": 0.008794736303389072, "signal/accuracy_reward/centered_abs_mean": 0.12056884765625, "signal/accuracy_reward/group_std_mean": 0.1548303782939911, "signal/accuracy_reward/group_zero_std_frac": 0.56875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.060284423828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.060284423828125, "signal/advantage_abs_mean": 0.06270648390054703, "signal/advantage_pre_scale_abs_mean": 0.06270648390054703, "signal/advantage_pre_scale_std": 0.10368855893611909, "signal/advantage_std": 0.10368855893611909, "signal/brier_reward/centered_abs_mean": 0.14834778904914855, "signal/brier_reward/group_std_mean": 0.18951214253902435, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01854347363114357, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01854347363114357, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029853297770023345, "signal/confidence_uniqueness_reward/group_std_mean": 0.03863080143928528, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003731662221252918, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003731662221252918, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020392842590808867, "signal/frontier_aurc_reward/group_std_mean": 0.003261732868850231, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.65031861292664e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.65031861292664e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21441528499126433, "signal/frontier_coverage_1/group_std_mean": 0.2732947587966919, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_10/centered_abs_mean": 0.21441528499126433, "signal/frontier_coverage_10/group_std_mean": 0.2732947587966919, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_15/centered_abs_mean": 0.21441528499126433, "signal/frontier_coverage_15/group_std_mean": 0.2732947587966919, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_20/centered_abs_mean": 0.21441528499126433, "signal/frontier_coverage_20/group_std_mean": 0.2732947587966919, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_25/centered_abs_mean": 0.18751142024993897, "signal/frontier_coverage_25/group_std_mean": 0.2398875504732132, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033564542420208452, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033564542420208452, "signal/frontier_coverage_5/centered_abs_mean": 0.21441528499126433, "signal/frontier_coverage_5/group_std_mean": 0.2732947587966919, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00383803341537714, "signal/frontier_ece_reward/centered_abs_mean": 0.011400581523776054, "signal/frontier_ece_reward/group_std_mean": 0.014213207736611366, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014250726904720067, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014250726904720067, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.5242910077871237, "eval_calibration/batch_distribution_entropy": 0.8875695608329197, "eval_calibration/buffer_distribution_entropy": 0.9648809725349352, "eval_calibration/confidence_entropy": 0.42631164594353543, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.1171875, "eval_calibration/coverage@25%": 0.1328125, "eval_calibration/coverage@30%": 0.2265625, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.211484375, "eval_calibration/mean_confidence": 0.40789062499999995, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 373.25, "eval_completions/max_terminated_length": 373.25, "eval_completions/mean_length": 217.67988967895508, "eval_completions/mean_terminated_length": 217.67988967895508, "eval_completions/min_length": 114.5, "eval_completions/min_terminated_length": 114.5, "eval_loss": 0.0, "eval_num_tokens": 505503360.0, "eval_reward": 0.947238028049469, "eval_reward_std": 0.21095874905586243, "eval_rewards/accuracy_reward": 0.416015625, "eval_rewards/brier_reward": 0.8082947880029678, "eval_rewards/confidence_uniqueness_reward": 0.890625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.002717375347856432, "eval_rewards/frontier_coverage_1": 0.2446550689637661, "eval_rewards/frontier_coverage_10": 0.2446550689637661, "eval_rewards/frontier_coverage_15": 0.2446550689637661, "eval_rewards/frontier_coverage_20": 0.2446550689637661, "eval_rewards/frontier_coverage_25": 0.20336830243468285, "eval_rewards/frontier_coverage_5": 0.2446550689637661, "eval_rewards/frontier_ece_reward": 0.011015785159543157, "eval_runtime": 20.0514, "eval_samples_per_second": 24.936, "eval_signal/accuracy_reward/centered_abs_mean": 0.4644775390625, "eval_signal/accuracy_reward/group_std_mean": 0.4889160767197609, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23223876953125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23223876953125, "eval_signal/advantage_abs_mean": 0.19064204022288322, "eval_signal/advantage_pre_scale_abs_mean": 0.19064204022288322, "eval_signal/advantage_pre_scale_std": 0.20879125222563744, "eval_signal/advantage_std": 0.20879125222563744, "eval_signal/brier_reward/centered_abs_mean": 0.20391716808080673, "eval_signal/brier_reward/group_std_mean": 0.26033516973257065, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02548964601010084, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02548964601010084, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0490570068359375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0586219010874629, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061321258544921875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061321258544921875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030126574565656483, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005966346827335656, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3926567488815635e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3926567488815635e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.42714041471481323, "eval_signal/frontier_coverage_1/group_std_mean": 0.508579321205616, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.42714041471481323, "eval_signal/frontier_coverage_10/group_std_mean": 0.508579321205616, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.42714041471481323, "eval_signal/frontier_coverage_15/group_std_mean": 0.508579321205616, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.42714041471481323, "eval_signal/frontier_coverage_20/group_std_mean": 0.508579321205616, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.35514652729034424, "eval_signal/frontier_coverage_25/group_std_mean": 0.4233100786805153, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006357122561894357, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006357122561894357, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.42714041471481323, "eval_signal/frontier_coverage_5/group_std_mean": 0.508579321205616, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00764581304974854, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.015712440479546785, "eval_signal/frontier_ece_reward/group_std_mean": 0.01919988915324211, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001964055059943348, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001964055059943348, "eval_steps_per_second": 0.199, "step": 150 }, { "calibration/aurc": 0.3840361950058275, "calibration/batch_distribution_entropy": 0.9363928805886245, "calibration/buffer_distribution_entropy": 0.96374512597476, "calibration/confidence_entropy": 0.41662806096978233, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.128515625, "calibration/coverage@15%": 0.150390625, "calibration/coverage@20%": 0.1984375, "calibration/coverage@25%": 0.233203125, "calibration/coverage@30%": 0.2875, "calibration/coverage@5%": 0.078515625, "calibration/ece": 0.14795585937499997, "calibration/mean_confidence": 0.48408632812499997, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.8, "completions/max_terminated_length": 490.8, "completions/mean_length": 216.8044921875, "completions/mean_terminated_length": 216.8044921875, "completions/min_length": 98.4, "completions/min_terminated_length": 98.4, "epoch": 0.496, "grad_norm": 0.0007564805564470589, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 523031278.0, "reward": 1.000865638256073, "reward_std": 0.08007181584835052, "rewards/accuracy_reward": 0.54189453125, "rewards/brier_reward": 0.7783055067062378, "rewards/confidence_uniqueness_reward": 0.9507560729980469, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0027518115239217877, "rewards/frontier_coverage_1": 0.12289173305034637, "rewards/frontier_coverage_10": 0.12289173305034637, "rewards/frontier_coverage_15": 0.12289173305034637, "rewards/frontier_coverage_20": 0.12289173305034637, "rewards/frontier_coverage_25": 0.09934463798999786, "rewards/frontier_coverage_5": 0.12289173305034637, "rewards/frontier_ece_reward": 0.00846287291496992, "signal/accuracy_reward/centered_abs_mean": 0.101702880859375, "signal/accuracy_reward/group_std_mean": 0.1375451058149338, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0508514404296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0508514404296875, "signal/advantage_abs_mean": 0.060508400201797485, "signal/advantage_pre_scale_abs_mean": 0.060508400201797485, "signal/advantage_pre_scale_std": 0.10368632078170777, "signal/advantage_std": 0.10368632078170777, "signal/brier_reward/centered_abs_mean": 0.1387119174003601, "signal/brier_reward/group_std_mean": 0.17947129905223846, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017338989675045012, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017338989675045012, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023092246055603026, "signal/confidence_uniqueness_reward/group_std_mean": 0.02924296595156193, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028865307569503782, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028865307569503782, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002348742028698325, "signal/frontier_aurc_reward/group_std_mean": 0.003962589660659432, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.204248070891481e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.204248070891481e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17886387705802917, "signal/frontier_coverage_1/group_std_mean": 0.23453721702098845, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_10/centered_abs_mean": 0.17886387705802917, "signal/frontier_coverage_10/group_std_mean": 0.23453721702098845, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_15/centered_abs_mean": 0.17886387705802917, "signal/frontier_coverage_15/group_std_mean": 0.23453721702098845, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_20/centered_abs_mean": 0.17886387705802917, "signal/frontier_coverage_20/group_std_mean": 0.23453721702098845, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_25/centered_abs_mean": 0.14604896903038025, "signal/frontier_coverage_25/group_std_mean": 0.19246629774570465, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026142764370888473, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026142764370888473, "signal/frontier_coverage_5/centered_abs_mean": 0.17886387705802917, "signal/frontier_coverage_5/group_std_mean": 0.23453721702098845, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032016633544117213, "signal/frontier_ece_reward/centered_abs_mean": 0.010641206428408623, "signal/frontier_ece_reward/group_std_mean": 0.013527031242847442, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013301508035510779, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013301508035510779, "step": 155 }, { "calibration/aurc": 0.31820365277463686, "calibration/batch_distribution_entropy": 0.955862569184351, "calibration/buffer_distribution_entropy": 0.9614048048735244, "calibration/confidence_entropy": 0.4472530108210627, "calibration/coverage@0%": 0.017578125, "calibration/coverage@1%": 0.017578125, "calibration/coverage@10%": 0.187890625, "calibration/coverage@15%": 0.29140625, "calibration/coverage@20%": 0.357421875, "calibration/coverage@25%": 0.424609375, "calibration/coverage@30%": 0.491015625, "calibration/coverage@5%": 0.09296875, "calibration/ece": 0.14949122731587602, "calibration/mean_confidence": 0.5130281283575426, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 618.8, "completions/max_terminated_length": 618.8, "completions/mean_length": 213.851953125, "completions/mean_terminated_length": 213.851953125, "completions/min_length": 102.4, "completions/min_terminated_length": 102.4, "epoch": 0.512, "grad_norm": 0.0008687236113473773, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 540366786.0, "reward": 1.0033927321434022, "reward_std": 0.08281527161598205, "rewards/accuracy_reward": 0.538671875, "rewards/brier_reward": 0.7973424553871155, "rewards/confidence_uniqueness_reward": 0.9516700744628906, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002556700585409999, "rewards/frontier_coverage_1": 0.13710992485284806, "rewards/frontier_coverage_10": 0.13710992485284806, "rewards/frontier_coverage_15": 0.13710992485284806, "rewards/frontier_coverage_20": 0.13710992485284806, "rewards/frontier_coverage_25": 0.10893923193216323, "rewards/frontier_coverage_5": 0.13710992485284806, "rewards/frontier_ece_reward": 0.01003704108297825, "signal/accuracy_reward/centered_abs_mean": 0.10458984375, "signal/accuracy_reward/group_std_mean": 0.1432061731815338, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052294921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052294921875, "signal/advantage_abs_mean": 0.06263713538646698, "signal/advantage_pre_scale_abs_mean": 0.06263713538646698, "signal/advantage_pre_scale_std": 0.10937785655260086, "signal/advantage_std": 0.10937785655260086, "signal/brier_reward/centered_abs_mean": 0.1311786264181137, "signal/brier_reward/group_std_mean": 0.16993843913078308, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016397328302264213, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016397328302264213, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023091053962707518, "signal/confidence_uniqueness_reward/group_std_mean": 0.029020922258496284, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028863817453384398, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028863817453384398, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022825901862233875, "signal/frontier_aurc_reward/group_std_mean": 0.0037255555856972934, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.085836226295214e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.085836226295214e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1627916783094406, "signal/frontier_coverage_1/group_std_mean": 0.21614238023757934, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_10/centered_abs_mean": 0.1627916783094406, "signal/frontier_coverage_10/group_std_mean": 0.21614238023757934, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_15/centered_abs_mean": 0.1627916783094406, "signal/frontier_coverage_15/group_std_mean": 0.21614238023757934, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_20/centered_abs_mean": 0.1627916783094406, "signal/frontier_coverage_20/group_std_mean": 0.21614238023757934, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_25/centered_abs_mean": 0.1238186538219452, "signal/frontier_coverage_25/group_std_mean": 0.16544671654701232, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022163538727909327, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022163538727909327, "signal/frontier_coverage_5/centered_abs_mean": 0.1627916783094406, "signal/frontier_coverage_5/group_std_mean": 0.21614238023757934, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002913971059024334, "signal/frontier_ece_reward/centered_abs_mean": 0.010741004720330238, "signal/frontier_ece_reward/group_std_mean": 0.013525258377194404, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013426255900412798, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013426255900412798, "step": 160 }, { "calibration/aurc": 0.20637803832182802, "calibration/batch_distribution_entropy": 0.9363854519581405, "calibration/buffer_distribution_entropy": 0.9601961260988621, "calibration/confidence_entropy": 0.41719762592603954, "calibration/coverage@0%": 0.05123685176125244, "calibration/coverage@1%": 0.06728381849315068, "calibration/coverage@10%": 0.3201450892857143, "calibration/coverage@15%": 0.3979008683953033, "calibration/coverage@20%": 0.4795690129647749, "calibration/coverage@25%": 0.6295980613992171, "calibration/coverage@30%": 0.7257208598336595, "calibration/coverage@5%": 0.18101608365949118, "calibration/ece": 0.10404099756635832, "calibration/mean_confidence": 0.5286149885154776, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 956.0, "completions/max_terminated_length": 573.8, "completions/mean_length": 215.037890625, "completions/mean_terminated_length": 214.7794982910156, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.528, "grad_norm": 0.0008408619905821979, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 557598310.0, "reward": 1.003028893470764, "reward_std": 0.08370372354984283, "rewards/accuracy_reward": 0.53251953125, "rewards/brier_reward": 0.8059515237808228, "rewards/confidence_uniqueness_reward": 0.950570797920227, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0023401447338983416, "rewards/frontier_coverage_1": 0.15629091411828994, "rewards/frontier_coverage_10": 0.15629091411828994, "rewards/frontier_coverage_15": 0.15629091411828994, "rewards/frontier_coverage_20": 0.15629091411828994, "rewards/frontier_coverage_25": 0.11881576627492904, "rewards/frontier_coverage_5": 0.15629091411828994, "rewards/frontier_ece_reward": 0.010219059139490127, "signal/accuracy_reward/centered_abs_mean": 0.117535400390625, "signal/accuracy_reward/group_std_mean": 0.14876840710639955, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0587677001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0587677001953125, "signal/advantage_abs_mean": 0.06532718688249588, "signal/advantage_pre_scale_abs_mean": 0.06532718688249588, "signal/advantage_pre_scale_std": 0.11273082941770554, "signal/advantage_std": 0.11273082941770554, "signal/brier_reward/centered_abs_mean": 0.12941071391105652, "signal/brier_reward/group_std_mean": 0.16719320714473723, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016176339238882065, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016176339238882065, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02397966869175434, "signal/confidence_uniqueness_reward/group_std_mean": 0.03146580345928669, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029974585864692926, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029974585864692926, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002146564261056483, "signal/frontier_aurc_reward/group_std_mean": 0.0033703493420034645, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.842349833576009e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.842349833576009e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1766491413116455, "signal/frontier_coverage_1/group_std_mean": 0.22845688462257385, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_10/centered_abs_mean": 0.1766491413116455, "signal/frontier_coverage_10/group_std_mean": 0.22845688462257385, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_15/centered_abs_mean": 0.1766491413116455, "signal/frontier_coverage_15/group_std_mean": 0.22845688462257385, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_20/centered_abs_mean": 0.1766491413116455, "signal/frontier_coverage_20/group_std_mean": 0.22845688462257385, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_25/centered_abs_mean": 0.1280095487833023, "signal/frontier_coverage_25/group_std_mean": 0.16677605509757995, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022913708817213774, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022913708817213774, "signal/frontier_coverage_5/centered_abs_mean": 0.1766491413116455, "signal/frontier_coverage_5/group_std_mean": 0.22845688462257385, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003162019606679678, "signal/frontier_ece_reward/centered_abs_mean": 0.010062118992209435, "signal/frontier_ece_reward/group_std_mean": 0.01266515776515007, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012577648740261793, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012577648740261793, "step": 165 }, { "calibration/aurc": 0.22990942715224433, "calibration/batch_distribution_entropy": 0.9086932295556744, "calibration/buffer_distribution_entropy": 0.9580016627959249, "calibration/confidence_entropy": 0.3994817622971429, "calibration/coverage@0%": 0.0140625, "calibration/coverage@1%": 0.0140625, "calibration/coverage@10%": 0.201953125, "calibration/coverage@15%": 0.3609375, "calibration/coverage@20%": 0.490625, "calibration/coverage@25%": 0.58984375, "calibration/coverage@30%": 0.70625, "calibration/coverage@5%": 0.094921875, "calibration/ece": 0.08248207063576382, "calibration/mean_confidence": 0.5646070706357639, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/max_terminated_length": 532.0, "completions/mean_length": 219.7181640625, "completions/mean_terminated_length": 219.7181640625, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.544, "grad_norm": 0.000887208734638989, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 575011808.0, "reward": 1.010369873046875, "reward_std": 0.0878099650144577, "rewards/accuracy_reward": 0.56220703125, "rewards/brier_reward": 0.7859106302261353, "rewards/confidence_uniqueness_reward": 0.9532241821289062, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002733378019183874, "rewards/frontier_coverage_1": 0.10622020661830903, "rewards/frontier_coverage_10": 0.10622020661830903, "rewards/frontier_coverage_15": 0.10622020661830903, "rewards/frontier_coverage_20": 0.10622020661830903, "rewards/frontier_coverage_25": 0.07521467357873916, "rewards/frontier_coverage_5": 0.10622020661830903, "rewards/frontier_ece_reward": 0.008562875911593437, "signal/accuracy_reward/centered_abs_mean": 0.122222900390625, "signal/accuracy_reward/group_std_mean": 0.16073189973831176, "signal/accuracy_reward/group_zero_std_frac": 0.54375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0611114501953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0611114501953125, "signal/advantage_abs_mean": 0.06658549755811691, "signal/advantage_pre_scale_abs_mean": 0.06658549755811691, "signal/advantage_pre_scale_std": 0.11341958940029144, "signal/advantage_std": 0.11341958940029144, "signal/brier_reward/centered_abs_mean": 0.13747948110103608, "signal/brier_reward/group_std_mean": 0.17746234834194183, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01718493513762951, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01718493513762951, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02245485782623291, "signal/confidence_uniqueness_reward/group_std_mean": 0.028442315012216567, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028068572282791138, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028068572282791138, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002469687769189477, "signal/frontier_aurc_reward/group_std_mean": 0.0038645863067358734, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.420740733621642e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.420740733621642e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17635051608085633, "signal/frontier_coverage_1/group_std_mean": 0.23008197844028472, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_10/centered_abs_mean": 0.17635051608085633, "signal/frontier_coverage_10/group_std_mean": 0.23008197844028472, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_15/centered_abs_mean": 0.17635051608085633, "signal/frontier_coverage_15/group_std_mean": 0.23008197844028472, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_20/centered_abs_mean": 0.17635051608085633, "signal/frontier_coverage_20/group_std_mean": 0.23008197844028472, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_25/centered_abs_mean": 0.11544786989688874, "signal/frontier_coverage_25/group_std_mean": 0.15155554413795472, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020665168296545742, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020665168296545742, "signal/frontier_coverage_5/centered_abs_mean": 0.17635051608085633, "signal/frontier_coverage_5/group_std_mean": 0.23008197844028472, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031566740944981575, "signal/frontier_ece_reward/centered_abs_mean": 0.010545129328966141, "signal/frontier_ece_reward/group_std_mean": 0.013178185001015663, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013181411661207676, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013181411661207676, "step": 170 }, { "calibration/aurc": 0.25756737417173714, "calibration/batch_distribution_entropy": 0.9384644007235714, "calibration/buffer_distribution_entropy": 0.956561369285882, "calibration/confidence_entropy": 0.41747994135409827, "calibration/coverage@0%": 0.06017612524461839, "calibration/coverage@1%": 0.1062698752446184, "calibration/coverage@10%": 0.2594674963307241, "calibration/coverage@15%": 0.35207696306262226, "calibration/coverage@20%": 0.3985942086594912, "calibration/coverage@25%": 0.47088353106653624, "calibration/coverage@30%": 0.6209072284735813, "calibration/coverage@5%": 0.2000535102739726, "calibration/ece": 0.119488073066949, "calibration/mean_confidence": 0.5228720969428358, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1030.0, "completions/max_terminated_length": 619.8, "completions/mean_length": 221.8912109375, "completions/mean_terminated_length": 221.6349365234375, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.56, "grad_norm": 0.0007872599526308477, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 592105382.0, "reward": 0.9981863141059876, "reward_std": 0.07935396581888199, "rewards/accuracy_reward": 0.52373046875, "rewards/brier_reward": 0.8048706293106079, "rewards/confidence_uniqueness_reward": 0.9511743545532226, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0027451789472252132, "rewards/frontier_coverage_1": 0.1567632645368576, "rewards/frontier_coverage_10": 0.1567632645368576, "rewards/frontier_coverage_15": 0.1567632645368576, "rewards/frontier_coverage_20": 0.1529286891222, "rewards/frontier_coverage_25": 0.10326671600341797, "rewards/frontier_coverage_5": 0.1567632645368576, "rewards/frontier_ece_reward": 0.009607139974832535, "signal/accuracy_reward/centered_abs_mean": 0.091827392578125, "signal/accuracy_reward/group_std_mean": 0.12912326753139497, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459136962890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0459136962890625, "signal/advantage_abs_mean": 0.05798099935054779, "signal/advantage_pre_scale_abs_mean": 0.05798099935054779, "signal/advantage_pre_scale_std": 0.10438774973154068, "signal/advantage_std": 0.10438774973154068, "signal/brier_reward/centered_abs_mean": 0.13308307528495789, "signal/brier_reward/group_std_mean": 0.17416214644908906, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016635384410619736, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016635384410619736, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024077710509300233, "signal/confidence_uniqueness_reward/group_std_mean": 0.03139141947031021, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003009713813662529, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003009713813662529, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002499508252367377, "signal/frontier_aurc_reward/group_std_mean": 0.004307471588253975, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4741196325048806e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4741196325048806e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16583755910396575, "signal/frontier_coverage_1/group_std_mean": 0.21883132457733154, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_10/centered_abs_mean": 0.16583755910396575, "signal/frontier_coverage_10/group_std_mean": 0.21883132457733154, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_15/centered_abs_mean": 0.16583755910396575, "signal/frontier_coverage_15/group_std_mean": 0.21883132457733154, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_20/centered_abs_mean": 0.16073502898216246, "signal/frontier_coverage_20/group_std_mean": 0.2121128112077713, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028771569021046163, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028771569021046163, "signal/frontier_coverage_25/centered_abs_mean": 0.1035612627863884, "signal/frontier_coverage_25/group_std_mean": 0.13709462583065032, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018537465017288922, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018537465017288922, "signal/frontier_coverage_5/centered_abs_mean": 0.16583755910396575, "signal/frontier_coverage_5/group_std_mean": 0.21883132457733154, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029684923123568297, "signal/frontier_ece_reward/centered_abs_mean": 0.009674718603491783, "signal/frontier_ece_reward/group_std_mean": 0.012150035984814168, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001209339825436473, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001209339825436473, "step": 175 }, { "calibration/aurc": 0.30044665717816926, "calibration/batch_distribution_entropy": 0.9354879991939942, "calibration/buffer_distribution_entropy": 0.9569462730506967, "calibration/confidence_entropy": 0.4156559235153006, "calibration/coverage@0%": 0.028567606409001956, "calibration/coverage@1%": 0.028567606409001956, "calibration/coverage@10%": 0.1552845217710372, "calibration/coverage@15%": 0.2494557240704501, "calibration/coverage@20%": 0.32019019080234834, "calibration/coverage@25%": 0.40653971991193744, "calibration/coverage@30%": 0.5010977250489237, "calibration/coverage@5%": 0.061052929305283755, "calibration/ece": 0.09349101345519373, "calibration/mean_confidence": 0.5241195313734912, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1106.0, "completions/max_terminated_length": 509.2, "completions/mean_length": 225.293359375, "completions/mean_terminated_length": 224.78112182617187, "completions/min_length": 103.8, "completions/min_terminated_length": 103.8, "epoch": 0.576, "grad_norm": 0.0006911300006322563, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 609599010.0, "reward": 0.9900464415550232, "reward_std": 0.0712356612086296, "rewards/accuracy_reward": 0.51416015625, "rewards/brier_reward": 0.7882812857627869, "rewards/confidence_uniqueness_reward": 0.949445104598999, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0031779037788510324, "rewards/frontier_coverage_1": 0.14884960055351257, "rewards/frontier_coverage_10": 0.14884960055351257, "rewards/frontier_coverage_15": 0.14884960055351257, "rewards/frontier_coverage_20": 0.14380609542131423, "rewards/frontier_coverage_25": 0.09822984933853149, "rewards/frontier_coverage_5": 0.14884960055351257, "rewards/frontier_ece_reward": 0.008101735450327396, "signal/accuracy_reward/centered_abs_mean": 0.080340576171875, "signal/accuracy_reward/group_std_mean": 0.1135935753583908, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401702880859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0401702880859375, "signal/advantage_abs_mean": 0.052441304177045824, "signal/advantage_pre_scale_abs_mean": 0.052441304177045824, "signal/advantage_pre_scale_std": 0.09616598784923554, "signal/advantage_std": 0.09616598784923554, "signal/brier_reward/centered_abs_mean": 0.12765369713306426, "signal/brier_reward/group_std_mean": 0.16434176564216613, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015956712141633033, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015956712141633033, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02433336600661278, "signal/confidence_uniqueness_reward/group_std_mean": 0.032431261241436006, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030416707508265973, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030416707508265973, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002604444185271859, "signal/frontier_aurc_reward/group_std_mean": 0.004322615498676896, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6619550994364543e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6619550994364543e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16047695577144622, "signal/frontier_coverage_1/group_std_mean": 0.20908625721931456, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_10/centered_abs_mean": 0.16047695577144622, "signal/frontier_coverage_10/group_std_mean": 0.20908625721931456, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_15/centered_abs_mean": 0.16047695577144622, "signal/frontier_coverage_15/group_std_mean": 0.20908625721931456, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_20/centered_abs_mean": 0.15487791895866393, "signal/frontier_coverage_20/group_std_mean": 0.20193188786506652, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027723145671188832, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027723145671188832, "signal/frontier_coverage_25/centered_abs_mean": 0.10067833811044694, "signal/frontier_coverage_25/group_std_mean": 0.13129209876060485, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018021421507000922, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018021421507000922, "signal/frontier_coverage_5/centered_abs_mean": 0.16047695577144622, "signal/frontier_coverage_5/group_std_mean": 0.20908625721931456, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002872537402436137, "signal/frontier_ece_reward/centered_abs_mean": 0.00897240024060011, "signal/frontier_ece_reward/group_std_mean": 0.011299080029129983, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011215500300750137, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011215500300750137, "step": 180 }, { "calibration/aurc": 0.3020793610970498, "calibration/batch_distribution_entropy": 0.9420785344993752, "calibration/buffer_distribution_entropy": 0.9564569849512502, "calibration/confidence_entropy": 0.4208254895055754, "calibration/coverage@0%": 0.01875, "calibration/coverage@1%": 0.01875, "calibration/coverage@10%": 0.18203125, "calibration/coverage@15%": 0.294921875, "calibration/coverage@20%": 0.446875, "calibration/coverage@25%": 0.508984375, "calibration/coverage@30%": 0.58203125, "calibration/coverage@5%": 0.11015625, "calibration/ece": 0.147133828125, "calibration/mean_confidence": 0.5008921875000001, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 701.4, "completions/max_terminated_length": 701.4, "completions/mean_length": 226.38173828125, "completions/mean_terminated_length": 226.38173828125, "completions/min_length": 103.8, "completions/min_terminated_length": 103.8, "epoch": 0.592, "grad_norm": 0.0008963316213339567, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 627084871.0, "reward": 0.9920501828193664, "reward_std": 0.07705037146806717, "rewards/accuracy_reward": 0.51953125, "rewards/brier_reward": 0.7879927754402161, "rewards/confidence_uniqueness_reward": 0.9459659218788147, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0027553184889256956, "rewards/frontier_coverage_1": 0.14658626317977905, "rewards/frontier_coverage_10": 0.14658626317977905, "rewards/frontier_coverage_15": 0.14658626317977905, "rewards/frontier_coverage_20": 0.1375451296567917, "rewards/frontier_coverage_25": 0.0929755374789238, "rewards/frontier_coverage_5": 0.14658626317977905, "rewards/frontier_ece_reward": 0.008127694483846426, "signal/accuracy_reward/centered_abs_mean": 0.10269775390625, "signal/accuracy_reward/group_std_mean": 0.13930981755256652, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051348876953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051348876953125, "signal/advantage_abs_mean": 0.05754327178001404, "signal/advantage_pre_scale_abs_mean": 0.05754327178001404, "signal/advantage_pre_scale_std": 0.10225750654935836, "signal/advantage_std": 0.10225750654935836, "signal/brier_reward/centered_abs_mean": 0.12910159975290297, "signal/brier_reward/group_std_mean": 0.16693442165851594, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01613769996911287, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01613769996911287, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02787396013736725, "signal/confidence_uniqueness_reward/group_std_mean": 0.03567564189434051, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003484245017170906, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003484245017170906, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002311707567423582, "signal/frontier_aurc_reward/group_std_mean": 0.0038075320888310673, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.137956348131411e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.137956348131411e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17492155730724335, "signal/frontier_coverage_1/group_std_mean": 0.22647031247615815, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_10/centered_abs_mean": 0.17492155730724335, "signal/frontier_coverage_10/group_std_mean": 0.22647031247615815, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_15/centered_abs_mean": 0.17492155730724335, "signal/frontier_coverage_15/group_std_mean": 0.22647031247615815, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_20/centered_abs_mean": 0.16214913129806519, "signal/frontier_coverage_20/group_std_mean": 0.21021865606307982, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029024692717939614, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029024692717939614, "signal/frontier_coverage_25/centered_abs_mean": 0.10388463884592056, "signal/frontier_coverage_25/group_std_mean": 0.13509499579668044, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018595349509268999, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018595349509268999, "signal/frontier_coverage_5/centered_abs_mean": 0.17492155730724335, "signal/frontier_coverage_5/group_std_mean": 0.22647031247615815, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003131095739081502, "signal/frontier_ece_reward/centered_abs_mean": 0.008527814783155918, "signal/frontier_ece_reward/group_std_mean": 0.010785996168851852, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010659768478944898, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010659768478944898, "step": 185 }, { "calibration/aurc": 0.21089239176890442, "calibration/batch_distribution_entropy": 0.9109937668709096, "calibration/buffer_distribution_entropy": 0.9564261109380905, "calibration/confidence_entropy": 0.3950107808886204, "calibration/coverage@0%": 0.0734375, "calibration/coverage@1%": 0.076953125, "calibration/coverage@10%": 0.319140625, "calibration/coverage@15%": 0.424609375, "calibration/coverage@20%": 0.533203125, "calibration/coverage@25%": 0.6421875, "calibration/coverage@30%": 0.71328125, "calibration/coverage@5%": 0.214453125, "calibration/ece": 0.10328154428114192, "calibration/mean_confidence": 0.4780747057188581, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 706.4, "completions/max_terminated_length": 508.2, "completions/mean_length": 229.04130859375, "completions/mean_terminated_length": 228.91400146484375, "completions/min_length": 110.6, "completions/min_terminated_length": 110.6, "epoch": 0.608, "grad_norm": 0.000727064092643559, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 644429742.0, "reward": 0.9991624474525451, "reward_std": 0.0650356225669384, "rewards/accuracy_reward": 0.517578125, "rewards/brier_reward": 0.82053302526474, "rewards/confidence_uniqueness_reward": 0.9456512808799744, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0022411981131881475, "rewards/frontier_coverage_1": 0.18771364390850068, "rewards/frontier_coverage_10": 0.18771364390850068, "rewards/frontier_coverage_15": 0.18771364390850068, "rewards/frontier_coverage_20": 0.17165526747703552, "rewards/frontier_coverage_25": 0.1158706158399582, "rewards/frontier_coverage_5": 0.18771364390850068, "rewards/frontier_ece_reward": 0.00920899622142315, "signal/accuracy_reward/centered_abs_mean": 0.0880126953125, "signal/accuracy_reward/group_std_mean": 0.12031828612089157, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04400634765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04400634765625, "signal/advantage_abs_mean": 0.0479205884039402, "signal/advantage_pre_scale_abs_mean": 0.0479205884039402, "signal/advantage_pre_scale_std": 0.08914662450551987, "signal/advantage_std": 0.08914662450551987, "signal/brier_reward/centered_abs_mean": 0.1190482184290886, "signal/brier_reward/group_std_mean": 0.154274782538414, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014881027303636074, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014881027303636074, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026865917071700097, "signal/confidence_uniqueness_reward/group_std_mean": 0.03425132632255554, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003358239633962512, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003358239633962512, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001823417330160737, "signal/frontier_aurc_reward/group_std_mean": 0.0030636247247457504, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2639168421155776e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2639168421155776e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1752112627029419, "signal/frontier_coverage_1/group_std_mean": 0.224535670876503, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_10/centered_abs_mean": 0.1752112627029419, "signal/frontier_coverage_10/group_std_mean": 0.224535670876503, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_15/centered_abs_mean": 0.1752112627029419, "signal/frontier_coverage_15/group_std_mean": 0.224535670876503, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_20/centered_abs_mean": 0.15818937122821808, "signal/frontier_coverage_20/group_std_mean": 0.20266130268573762, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028315896168351175, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028315896168351175, "signal/frontier_coverage_25/centered_abs_mean": 0.10065800696611404, "signal/frontier_coverage_25/group_std_mean": 0.12867961823940277, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018017783062532545, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018017783062532545, "signal/frontier_coverage_5/centered_abs_mean": 0.1752112627029419, "signal/frontier_coverage_5/group_std_mean": 0.224535670876503, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031362815760076048, "signal/frontier_ece_reward/centered_abs_mean": 0.00798153392970562, "signal/frontier_ece_reward/group_std_mean": 0.009954988211393356, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009976917412132026, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009976917412132026, "step": 190 }, { "calibration/aurc": 0.24885868636965697, "calibration/batch_distribution_entropy": 0.953152157528556, "calibration/buffer_distribution_entropy": 0.9558545475314965, "calibration/confidence_entropy": 0.43321819447817606, "calibration/coverage@0%": 0.0109375, "calibration/coverage@1%": 0.0109375, "calibration/coverage@10%": 0.09375, "calibration/coverage@15%": 0.31640625, "calibration/coverage@20%": 0.433984375, "calibration/coverage@25%": 0.559765625, "calibration/coverage@30%": 0.64765625, "calibration/coverage@5%": 0.024609375, "calibration/ece": 0.10506215171912128, "calibration/mean_confidence": 0.4945223541320223, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 982.2, "completions/max_terminated_length": 773.0, "completions/mean_length": 233.69580078125, "completions/mean_terminated_length": 233.56867065429688, "completions/min_length": 113.2, "completions/min_terminated_length": 113.2, "epoch": 0.624, "grad_norm": 0.0008362337248399854, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 662166691.0, "reward": 1.0022923231124878, "reward_std": 0.07647469788789749, "rewards/accuracy_reward": 0.5296875, "rewards/brier_reward": 0.8107707142829895, "rewards/confidence_uniqueness_reward": 0.9529690384864807, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0024412672501057386, "rewards/frontier_coverage_1": 0.16334131360054016, "rewards/frontier_coverage_10": 0.16334131360054016, "rewards/frontier_coverage_15": 0.16334131360054016, "rewards/frontier_coverage_20": 0.15014731287956237, "rewards/frontier_coverage_25": 0.0971300944685936, "rewards/frontier_coverage_5": 0.16334131360054016, "rewards/frontier_ece_reward": 0.007617098093032837, "signal/accuracy_reward/centered_abs_mean": 0.103271484375, "signal/accuracy_reward/group_std_mean": 0.1352292686700821, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516357421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0516357421875, "signal/advantage_abs_mean": 0.058700380474328996, "signal/advantage_pre_scale_abs_mean": 0.058700380474328996, "signal/advantage_pre_scale_std": 0.10284461975097656, "signal/advantage_std": 0.10284461975097656, "signal/brier_reward/centered_abs_mean": 0.1295202523469925, "signal/brier_reward/group_std_mean": 0.16612099409103392, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01619003154337406, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01619003154337406, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022126782685518265, "signal/confidence_uniqueness_reward/group_std_mean": 0.028393551334738733, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002765847835689783, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002765847835689783, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020905883517116307, "signal/frontier_aurc_reward/group_std_mean": 0.003407838987186551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.742152948689181e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.742152948689181e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17640204429626466, "signal/frontier_coverage_1/group_std_mean": 0.22737206220626832, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_10/centered_abs_mean": 0.17640204429626466, "signal/frontier_coverage_10/group_std_mean": 0.22737206220626832, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_15/centered_abs_mean": 0.17640204429626466, "signal/frontier_coverage_15/group_std_mean": 0.22737206220626832, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_20/centered_abs_mean": 0.15503880083560945, "signal/frontier_coverage_20/group_std_mean": 0.20045762360095978, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002775194449350238, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002775194449350238, "signal/frontier_coverage_25/centered_abs_mean": 0.09230681955814361, "signal/frontier_coverage_25/group_std_mean": 0.12007313668727874, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016522919991984963, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016522919991984963, "signal/frontier_coverage_5/centered_abs_mean": 0.17640204429626466, "signal/frontier_coverage_5/group_std_mean": 0.22737206220626832, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003157596383243799, "signal/frontier_ece_reward/centered_abs_mean": 0.0074423874728381635, "signal/frontier_ece_reward/group_std_mean": 0.009387831203639507, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009302984341047704, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009302984341047704, "step": 195 }, { "calibration/aurc": 0.27366786661512593, "calibration/batch_distribution_entropy": 0.9267823537685029, "calibration/buffer_distribution_entropy": 0.9562173422993376, "calibration/confidence_entropy": 0.43266101942321056, "calibration/coverage@0%": 0.033594514432485324, "calibration/coverage@1%": 0.05586013943248532, "calibration/coverage@10%": 0.2957038894324853, "calibration/coverage@15%": 0.3343757644324853, "calibration/coverage@20%": 0.4261726394324853, "calibration/coverage@25%": 0.4910163894324853, "calibration/coverage@30%": 0.5804695144324853, "calibration/coverage@5%": 0.2148445144324853, "calibration/ece": 0.1776985742252802, "calibration/mean_confidence": 0.5747919124483183, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1343.6, "completions/max_terminated_length": 643.4, "completions/mean_length": 236.0283203125, "completions/mean_terminated_length": 235.26619873046874, "completions/min_length": 111.2, "completions/min_terminated_length": 111.2, "epoch": 0.64, "grad_norm": 0.0007717712433077395, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 679926309.0, "reward": 1.0139172077178955, "reward_std": 0.073719322681427, "rewards/accuracy_reward": 0.56474609375, "rewards/brier_reward": 0.801562488079071, "rewards/confidence_uniqueness_reward": 0.9502385973930358, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002655105572193861, "rewards/frontier_coverage_1": 0.12261157184839248, "rewards/frontier_coverage_10": 0.12261157184839248, "rewards/frontier_coverage_15": 0.12261157184839248, "rewards/frontier_coverage_20": 0.10896739810705185, "rewards/frontier_coverage_25": 0.07364076673984528, "rewards/frontier_coverage_5": 0.12261157184839248, "rewards/frontier_ece_reward": 0.007285200804471969, "signal/accuracy_reward/centered_abs_mean": 0.089215087890625, "signal/accuracy_reward/group_std_mean": 0.11973312497138977, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0446075439453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0446075439453125, "signal/advantage_abs_mean": 0.05503489300608635, "signal/advantage_pre_scale_abs_mean": 0.05503489300608635, "signal/advantage_pre_scale_std": 0.10273861885070801, "signal/advantage_std": 0.10273861885070801, "signal/brier_reward/centered_abs_mean": 0.121430304646492, "signal/brier_reward/group_std_mean": 0.15940046012401582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0151787880808115, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0151787880808115, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024930314719676973, "signal/confidence_uniqueness_reward/group_std_mean": 0.03353976756334305, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031162893399596216, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031162893399596216, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_std_mean": 0.0035306816454976795, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025112688075751067, "signal/frontier_aurc_reward/group_std_mean": 0.0040036571677774194, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.495171051530633e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.495171051530633e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14729901850223542, "signal/frontier_coverage_1/group_std_mean": 0.1960638076066971, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_10/centered_abs_mean": 0.14729901850223542, "signal/frontier_coverage_10/group_std_mean": 0.1960638076066971, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_15/centered_abs_mean": 0.14729901850223542, "signal/frontier_coverage_15/group_std_mean": 0.1960638076066971, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_20/centered_abs_mean": 0.1258530229330063, "signal/frontier_coverage_20/group_std_mean": 0.16801635324954986, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022527690045535563, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022527690045535563, "signal/frontier_coverage_25/centered_abs_mean": 0.07607003599405289, "signal/frontier_coverage_25/group_std_mean": 0.10135896503925323, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013616536045446992, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013616536045446992, "signal/frontier_coverage_5/centered_abs_mean": 0.14729901850223542, "signal/frontier_coverage_5/group_std_mean": 0.1960638076066971, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002636652393266559, "signal/frontier_ece_reward/centered_abs_mean": 0.00694213742390275, "signal/frontier_ece_reward/group_std_mean": 0.008880362659692765, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008677671779878437, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008677671779878437, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.4512389389889342, "eval_calibration/batch_distribution_entropy": 0.8900863453854657, "eval_calibration/buffer_distribution_entropy": 0.956610375342669, "eval_calibration/confidence_entropy": 0.41897089379700764, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.078125, "eval_calibration/coverage@25%": 0.234375, "eval_calibration/coverage@30%": 0.296875, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.22234375, "eval_calibration/mean_confidence": 0.50015625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 424.25, "eval_completions/max_terminated_length": 424.25, "eval_completions/mean_length": 236.86260604858398, "eval_completions/mean_terminated_length": 236.86260604858398, "eval_completions/min_length": 140.25, "eval_completions/min_terminated_length": 140.25, "eval_loss": 0.0, "eval_num_tokens": 679926309.0, "eval_reward": 0.945796549320221, "eval_reward_std": 0.23248660191893578, "eval_rewards/accuracy_reward": 0.42578125, "eval_rewards/brier_reward": 0.7914303839206696, "eval_rewards/confidence_uniqueness_reward": 0.900390625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0042260364862158895, "eval_rewards/frontier_coverage_1": 0.2153126746416092, "eval_rewards/frontier_coverage_10": 0.2153126746416092, "eval_rewards/frontier_coverage_15": 0.2153126746416092, "eval_rewards/frontier_coverage_20": 0.18361878022551537, "eval_rewards/frontier_coverage_25": 0.10805939510464668, "eval_rewards/frontier_coverage_5": 0.2153126746416092, "eval_rewards/frontier_ece_reward": 0.006932097370736301, "eval_runtime": 22.0641, "eval_samples_per_second": 22.661, "eval_signal/accuracy_reward/centered_abs_mean": 0.476318359375, "eval_signal/accuracy_reward/group_std_mean": 0.4956294521689415, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2381591796875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2381591796875, "eval_signal/advantage_abs_mean": 0.21480223909020424, "eval_signal/advantage_pre_scale_abs_mean": 0.21480223909020424, "eval_signal/advantage_pre_scale_std": 0.2299434170126915, "eval_signal/advantage_std": 0.2299434170126915, "eval_signal/brier_reward/centered_abs_mean": 0.2213696613907814, "eval_signal/brier_reward/group_std_mean": 0.27776212990283966, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027671207673847675, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027671207673847675, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0403900146484375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04716748744249344, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0050487518310546875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0050487518310546875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0055589916300959885, "eval_signal/frontier_aurc_reward/group_std_mean": 0.010654692072421312, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.950594630936394e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.950594630936394e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.37191175669431686, "eval_signal/frontier_coverage_1/group_std_mean": 0.4545610621571541, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.37191175669431686, "eval_signal/frontier_coverage_10/group_std_mean": 0.4545610621571541, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.37191175669431686, "eval_signal/frontier_coverage_15/group_std_mean": 0.4545610621571541, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3148370534181595, "eval_signal/frontier_coverage_20/group_std_mean": 0.3863198012113571, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005635583191178739, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005635583191178739, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.17110588029026985, "eval_signal/frontier_coverage_25/group_std_mean": 0.21621626242995262, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030627950909547508, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030627950909547508, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.37191175669431686, "eval_signal/frontier_coverage_5/group_std_mean": 0.4545610621571541, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006657220306806266, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.011155220679938793, "eval_signal/frontier_ece_reward/group_std_mean": 0.013499156106263399, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013944025849923491, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013944025849923491, "eval_steps_per_second": 0.181, "step": 200 }, { "calibration/aurc": 0.4215608850322955, "calibration/batch_distribution_entropy": 0.9583672438505666, "calibration/buffer_distribution_entropy": 0.9569867028799818, "calibration/confidence_entropy": 0.4560377670341033, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.000390625, "calibration/coverage@15%": 0.039093077299412915, "calibration/coverage@20%": 0.11807424168297456, "calibration/coverage@25%": 0.17905913649706456, "calibration/coverage@30%": 0.2486217282289628, "calibration/coverage@5%": 0.000390625, "calibration/ece": 0.1416412763258362, "calibration/mean_confidence": 0.527144841838013, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 738.6, "completions/max_terminated_length": 543.8, "completions/mean_length": 234.63759765625, "completions/mean_terminated_length": 234.51047973632814, "completions/min_length": 108.2, "completions/min_terminated_length": 108.2, "epoch": 0.656, "grad_norm": 0.0008832156891003251, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 697185542.0, "reward": 0.9804197549819946, "reward_std": 0.07824952602386474, "rewards/accuracy_reward": 0.49775390625, "rewards/brier_reward": 0.780112111568451, "rewards/confidence_uniqueness_reward": 0.9548117399215699, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0036599119659513233, "rewards/frontier_coverage_1": 0.14614389687776566, "rewards/frontier_coverage_10": 0.14614389687776566, "rewards/frontier_coverage_15": 0.14614389687776566, "rewards/frontier_coverage_20": 0.12369791716337204, "rewards/frontier_coverage_25": 0.07793211117386818, "rewards/frontier_coverage_5": 0.14614389687776566, "rewards/frontier_ece_reward": 0.006139390263706445, "signal/accuracy_reward/centered_abs_mean": 0.096759033203125, "signal/accuracy_reward/group_std_mean": 0.12834607511758805, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0483795166015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0483795166015625, "signal/advantage_abs_mean": 0.06001611351966858, "signal/advantage_pre_scale_abs_mean": 0.06001611351966858, "signal/advantage_pre_scale_std": 0.107490074634552, "signal/advantage_std": 0.107490074634552, "signal/brier_reward/centered_abs_mean": 0.13132331371307374, "signal/brier_reward/group_std_mean": 0.16742262840270997, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016415414214134217, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016415414214134217, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020241304486989974, "signal/confidence_uniqueness_reward/group_std_mean": 0.0267801396548748, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002530163060873747, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002530163060873747, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032942437566816805, "signal/frontier_aurc_reward/group_std_mean": 0.005665683187544346, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.8966961660189554e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.8966961660189554e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15632550716400145, "signal/frontier_coverage_1/group_std_mean": 0.2049040824174881, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_10/centered_abs_mean": 0.15632550716400145, "signal/frontier_coverage_10/group_std_mean": 0.2049040824174881, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_15/centered_abs_mean": 0.15632550716400145, "signal/frontier_coverage_15/group_std_mean": 0.2049040824174881, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_20/centered_abs_mean": 0.13002455830574036, "signal/frontier_coverage_20/group_std_mean": 0.17081713378429414, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023274393985047936, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023274393985047936, "signal/frontier_coverage_25/centered_abs_mean": 0.07792463153600693, "signal/frontier_coverage_25/group_std_mean": 0.10254463851451874, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001394850853830576, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001394850853830576, "signal/frontier_coverage_5/centered_abs_mean": 0.15632550716400145, "signal/frontier_coverage_5/group_std_mean": 0.2049040824174881, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002798226475715637, "signal/frontier_ece_reward/centered_abs_mean": 0.00685331979766488, "signal/frontier_ece_reward/group_std_mean": 0.00863857101649046, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00085666497470811, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00085666497470811, "step": 205 }, { "calibration/aurc": 0.2770882140684193, "calibration/batch_distribution_entropy": 0.9175148206245929, "calibration/buffer_distribution_entropy": 0.9586387820511565, "calibration/confidence_entropy": 0.41085941712176605, "calibration/coverage@0%": 0.02890625, "calibration/coverage@1%": 0.052734375, "calibration/coverage@10%": 0.158203125, "calibration/coverage@15%": 0.2578125, "calibration/coverage@20%": 0.32734375, "calibration/coverage@25%": 0.405859375, "calibration/coverage@30%": 0.5078125, "calibration/coverage@5%": 0.070703125, "calibration/ece": 0.15999164143747102, "calibration/mean_confidence": 0.5309644159883717, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 772.2, "completions/max_terminated_length": 588.8, "completions/mean_length": 234.79482421875, "completions/mean_terminated_length": 234.66750793457032, "completions/min_length": 108.6, "completions/min_terminated_length": 108.6, "epoch": 0.672, "grad_norm": 0.0007301148143596947, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 714503281.0, "reward": 0.9924185633659363, "reward_std": 0.07214737236499787, "rewards/accuracy_reward": 0.5154296875, "rewards/brier_reward": 0.7984122276306153, "rewards/confidence_uniqueness_reward": 0.9413475751876831, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0031860186252743007, "rewards/frontier_coverage_1": 0.1705209881067276, "rewards/frontier_coverage_10": 0.1705209881067276, "rewards/frontier_coverage_15": 0.1705209881067276, "rewards/frontier_coverage_20": 0.1439062923192978, "rewards/frontier_coverage_25": 0.09587938338518143, "rewards/frontier_coverage_5": 0.1705209881067276, "rewards/frontier_ece_reward": 0.007095560338348151, "signal/accuracy_reward/centered_abs_mean": 0.09951171875, "signal/accuracy_reward/group_std_mean": 0.130070324242115, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049755859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049755859375, "signal/advantage_abs_mean": 0.054243403673172, "signal/advantage_pre_scale_abs_mean": 0.054243403673172, "signal/advantage_pre_scale_std": 0.09895178079605102, "signal/advantage_std": 0.09895178079605102, "signal/brier_reward/centered_abs_mean": 0.12464683204889297, "signal/brier_reward/group_std_mean": 0.16111274659633637, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015580854006111622, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015580854006111622, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029499797150492668, "signal/confidence_uniqueness_reward/group_std_mean": 0.03853048831224441, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036874746438115835, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036874746438115835, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002818222576752305, "signal/frontier_aurc_reward/group_std_mean": 0.004558488540351391, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.044618155807257e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.044618155807257e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16789944767951964, "signal/frontier_coverage_1/group_std_mean": 0.21639321148395538, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_10/centered_abs_mean": 0.16789944767951964, "signal/frontier_coverage_10/group_std_mean": 0.21639321148395538, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_15/centered_abs_mean": 0.16789944767951964, "signal/frontier_coverage_15/group_std_mean": 0.21639321148395538, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_20/centered_abs_mean": 0.13216465711593628, "signal/frontier_coverage_20/group_std_mean": 0.1715537875890732, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023657473269850017, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023657473269850017, "signal/frontier_coverage_25/centered_abs_mean": 0.08391138613224029, "signal/frontier_coverage_25/group_std_mean": 0.10859294980764389, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015020138118416071, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015020138118416071, "signal/frontier_coverage_5/centered_abs_mean": 0.16789944767951964, "signal/frontier_coverage_5/group_std_mean": 0.21639321148395538, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030053998343646526, "signal/frontier_ece_reward/centered_abs_mean": 0.006560100056231022, "signal/frontier_ece_reward/group_std_mean": 0.008255177177488804, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008200125070288777, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008200125070288777, "step": 210 }, { "calibration/aurc": 0.3324319783208281, "calibration/batch_distribution_entropy": 0.9320277946427449, "calibration/buffer_distribution_entropy": 0.9584549993018816, "calibration/confidence_entropy": 0.42771008132219396, "calibration/coverage@0%": 0.01917196673189824, "calibration/coverage@1%": 0.01917196673189824, "calibration/coverage@10%": 0.09771052470645793, "calibration/coverage@15%": 0.1715569960861057, "calibration/coverage@20%": 0.2720125978473581, "calibration/coverage@25%": 0.4049130075831703, "calibration/coverage@30%": 0.6006367722602739, "calibration/coverage@5%": 0.01917196673189824, "calibration/ece": 0.14269236415772915, "calibration/mean_confidence": 0.507278526168631, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 735.4, "completions/max_terminated_length": 545.2, "completions/mean_length": 236.37587890625, "completions/mean_terminated_length": 236.2491943359375, "completions/min_length": 109.8, "completions/min_terminated_length": 109.8, "epoch": 0.688, "grad_norm": 0.0009615990566089749, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 731877690.0, "reward": 0.9984089136123657, "reward_std": 0.07327373176813126, "rewards/accuracy_reward": 0.528515625, "rewards/brier_reward": 0.800303041934967, "rewards/confidence_uniqueness_reward": 0.9471487998962402, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0026645056437700986, "rewards/frontier_coverage_1": 0.1580977201461792, "rewards/frontier_coverage_10": 0.1580977201461792, "rewards/frontier_coverage_15": 0.1580977201461792, "rewards/frontier_coverage_20": 0.12280210703611374, "rewards/frontier_coverage_25": 0.08494900315999984, "rewards/frontier_coverage_5": 0.1580977201461792, "rewards/frontier_ece_reward": 0.006611639633774757, "signal/accuracy_reward/centered_abs_mean": 0.10098876953125, "signal/accuracy_reward/group_std_mean": 0.1342798501253128, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.050494384765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.050494384765625, "signal/advantage_abs_mean": 0.055242912471294404, "signal/advantage_pre_scale_abs_mean": 0.055242912471294404, "signal/advantage_pre_scale_std": 0.10234064608812332, "signal/advantage_std": 0.10234064608812332, "signal/brier_reward/centered_abs_mean": 0.12077709585428238, "signal/brier_reward/group_std_mean": 0.15675756931304932, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015097136981785298, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015097136981785298, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025125860422849654, "signal/confidence_uniqueness_reward/group_std_mean": 0.032760153710842135, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003140732552856207, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003140732552856207, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002273104293271899, "signal/frontier_aurc_reward/group_std_mean": 0.0036782562732696534, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.068856578669511e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.068856578669511e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16195787191390992, "signal/frontier_coverage_1/group_std_mean": 0.20971050560474397, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_10/centered_abs_mean": 0.16195787191390992, "signal/frontier_coverage_10/group_std_mean": 0.20971050560474397, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_15/centered_abs_mean": 0.16195787191390992, "signal/frontier_coverage_15/group_std_mean": 0.20971050560474397, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_20/centered_abs_mean": 0.12282232344150543, "signal/frontier_coverage_20/group_std_mean": 0.1596350461244583, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021985195111483336, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021985195111483336, "signal/frontier_coverage_25/centered_abs_mean": 0.08016574680805207, "signal/frontier_coverage_25/group_std_mean": 0.10362372994422912, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014349668985232712, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014349668985232712, "signal/frontier_coverage_5/centered_abs_mean": 0.16195787191390992, "signal/frontier_coverage_5/group_std_mean": 0.20971050560474397, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002899045730009675, "signal/frontier_ece_reward/centered_abs_mean": 0.006114064436405897, "signal/frontier_ece_reward/group_std_mean": 0.007758526690304279, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007642580545507372, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007642580545507372, "step": 215 }, { "calibration/aurc": 0.26502404014152814, "calibration/batch_distribution_entropy": 0.9127524752143842, "calibration/buffer_distribution_entropy": 0.9590662096299957, "calibration/confidence_entropy": 0.40612629682905776, "calibration/coverage@0%": 0.004689028864970646, "calibration/coverage@1%": 0.004689028864970646, "calibration/coverage@10%": 0.010559870352250488, "calibration/coverage@15%": 0.18851593077299414, "calibration/coverage@20%": 0.29834959026418784, "calibration/coverage@25%": 0.5343130809686889, "calibration/coverage@30%": 0.6917632399706457, "calibration/coverage@5%": 0.004689028864970646, "calibration/ece": 0.10851483484359577, "calibration/mean_confidence": 0.5313303860375503, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 932.4, "completions/max_terminated_length": 527.6, "completions/mean_length": 234.701171875, "completions/mean_terminated_length": 234.44721069335938, "completions/min_length": 108.4, "completions/min_terminated_length": 108.4, "epoch": 0.704, "grad_norm": 0.000645692169200629, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 749147174.0, "reward": 1.0034277796745301, "reward_std": 0.06270710080862045, "rewards/accuracy_reward": 0.53623046875, "rewards/brier_reward": 0.8096086740493774, "rewards/confidence_uniqueness_reward": 0.9504172205924988, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0027049076044932006, "rewards/frontier_coverage_1": 0.15413620173931122, "rewards/frontier_coverage_10": 0.15413620173931122, "rewards/frontier_coverage_15": 0.15413620173931122, "rewards/frontier_coverage_20": 0.12171441465616226, "rewards/frontier_coverage_25": 0.08325019925832748, "rewards/frontier_coverage_5": 0.15413620173931122, "rewards/frontier_ece_reward": 0.006393497437238693, "signal/accuracy_reward/centered_abs_mean": 0.072747802734375, "signal/accuracy_reward/group_std_mean": 0.0994048297405243, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363739013671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0363739013671875, "signal/advantage_abs_mean": 0.04672844707965851, "signal/advantage_pre_scale_abs_mean": 0.04672844707965851, "signal/advantage_pre_scale_std": 0.08910781294107437, "signal/advantage_std": 0.08910781294107437, "signal/brier_reward/centered_abs_mean": 0.11959185302257538, "signal/brier_reward/group_std_mean": 0.15349071621894836, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014948981627821923, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014948981627821923, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022796983271837233, "signal/confidence_uniqueness_reward/group_std_mean": 0.03028981201350689, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002849622908979654, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002849622908979654, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023922407533973457, "signal/frontier_aurc_reward/group_std_mean": 0.003952773287892342, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.282110749045387e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.282110749045387e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15440512895584108, "signal/frontier_coverage_1/group_std_mean": 0.19809613525867462, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_10/centered_abs_mean": 0.15440512895584108, "signal/frontier_coverage_10/group_std_mean": 0.19809613525867462, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_15/centered_abs_mean": 0.15440512895584108, "signal/frontier_coverage_15/group_std_mean": 0.19809613525867462, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_20/centered_abs_mean": 0.11523250043392182, "signal/frontier_coverage_20/group_std_mean": 0.14851680397987366, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020626616897061467, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020626616897061467, "signal/frontier_coverage_25/centered_abs_mean": 0.07550591826438904, "signal/frontier_coverage_25/group_std_mean": 0.09685217291116714, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001351555879227817, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001351555879227817, "signal/frontier_coverage_5/centered_abs_mean": 0.15440512895584108, "signal/frontier_coverage_5/group_std_mean": 0.19809613525867462, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002763851685449481, "signal/frontier_ece_reward/centered_abs_mean": 0.005882252100855112, "signal/frontier_ece_reward/group_std_mean": 0.007382483780384063, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000735281512606889, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000735281512606889, "step": 220 }, { "calibration/aurc": 0.23271464846876766, "calibration/batch_distribution_entropy": 0.9358357262640282, "calibration/buffer_distribution_entropy": 0.9581519056963123, "calibration/confidence_entropy": 0.4162220078231755, "calibration/coverage@0%": 0.072265625, "calibration/coverage@1%": 0.084765625, "calibration/coverage@10%": 0.253125, "calibration/coverage@15%": 0.379296875, "calibration/coverage@20%": 0.47265625, "calibration/coverage@25%": 0.57265625, "calibration/coverage@30%": 0.667578125, "calibration/coverage@5%": 0.16796875, "calibration/ece": 0.1367582091779871, "calibration/mean_confidence": 0.5125693560602549, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.6, "completions/max_terminated_length": 593.6, "completions/mean_length": 237.528125, "completions/mean_terminated_length": 237.528125, "completions/min_length": 107.8, "completions/min_terminated_length": 107.8, "epoch": 0.72, "grad_norm": 0.0007646158919669688, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 766589318.0, "reward": 1.0137495040893554, "reward_std": 0.06675339564681053, "rewards/accuracy_reward": 0.559375, "rewards/brier_reward": 0.8103640794754028, "rewards/confidence_uniqueness_reward": 0.9529289245605469, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002305832249112427, "rewards/frontier_coverage_1": 0.13652174174785614, "rewards/frontier_coverage_10": 0.13652174174785614, "rewards/frontier_coverage_15": 0.13516611903905867, "rewards/frontier_coverage_20": 0.10529735386371612, "rewards/frontier_coverage_25": 0.07399671077728272, "rewards/frontier_coverage_5": 0.13652174174785614, "rewards/frontier_ece_reward": 0.005852967128157615, "signal/accuracy_reward/centered_abs_mean": 0.09056396484375, "signal/accuracy_reward/group_std_mean": 0.1233248084783554, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045281982421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045281982421875, "signal/advantage_abs_mean": 0.04960698410868645, "signal/advantage_pre_scale_abs_mean": 0.04960698410868645, "signal/advantage_pre_scale_std": 0.09404271245002746, "signal/advantage_std": 0.09404271245002746, "signal/brier_reward/centered_abs_mean": 0.1129736065864563, "signal/brier_reward/group_std_mean": 0.14554286301136016, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014121700823307038, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014121700823307038, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02097158432006836, "signal/confidence_uniqueness_reward/group_std_mean": 0.026838266104459763, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002621448040008545, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002621448040008545, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020008538849651814, "signal/frontier_aurc_reward/group_std_mean": 0.003258723812177777, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.581528362701647e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.581528362701647e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15535161793231964, "signal/frontier_coverage_1/group_std_mean": 0.20127532184123992, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027807938866317274, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027807938866317274, "signal/frontier_coverage_10/centered_abs_mean": 0.15535161793231964, "signal/frontier_coverage_10/group_std_mean": 0.20127532184123992, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027807938866317274, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027807938866317274, "signal/frontier_coverage_15/centered_abs_mean": 0.1520615577697754, "signal/frontier_coverage_15/group_std_mean": 0.19707475900650023, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027219018433243037, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027219018433243037, "signal/frontier_coverage_20/centered_abs_mean": 0.10928000509738922, "signal/frontier_coverage_20/group_std_mean": 0.14226324558258058, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019561121007427573, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019561121007427573, "signal/frontier_coverage_25/centered_abs_mean": 0.06993094086647034, "signal/frontier_coverage_25/group_std_mean": 0.09024198353290558, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001251763803884387, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001251763803884387, "signal/frontier_coverage_5/centered_abs_mean": 0.15535161793231964, "signal/frontier_coverage_5/group_std_mean": 0.20127532184123992, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027807938866317274, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027807938866317274, "signal/frontier_ece_reward/centered_abs_mean": 0.005452153272926807, "signal/frontier_ece_reward/group_std_mean": 0.00692967027425766, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006815191591158509, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006815191591158509, "step": 225 }, { "calibration/aurc": 0.2486706639284412, "calibration/batch_distribution_entropy": 0.9398357791188333, "calibration/buffer_distribution_entropy": 0.9573366301942062, "calibration/confidence_entropy": 0.4341578722653532, "calibration/coverage@0%": 0.012109375, "calibration/coverage@1%": 0.012109375, "calibration/coverage@10%": 0.101953125, "calibration/coverage@15%": 0.211328125, "calibration/coverage@20%": 0.461328125, "calibration/coverage@25%": 0.554296875, "calibration/coverage@30%": 0.637890625, "calibration/coverage@5%": 0.0421875, "calibration/ece": 0.12902223483594827, "calibration/mean_confidence": 0.5501059461619251, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 812.6, "completions/max_terminated_length": 639.8, "completions/mean_length": 241.2818359375, "completions/mean_terminated_length": 241.15572814941407, "completions/min_length": 108.4, "completions/min_terminated_length": 108.4, "epoch": 0.736, "grad_norm": 0.0005533373332582414, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 783999628.0, "reward": 1.0136502385139465, "reward_std": 0.06907420158386231, "rewards/accuracy_reward": 0.564453125, "rewards/brier_reward": 0.8001478791236878, "rewards/confidence_uniqueness_reward": 0.9533275842666626, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0023767944891005754, "rewards/frontier_coverage_1": 0.12251611799001694, "rewards/frontier_coverage_10": 0.12251611799001694, "rewards/frontier_coverage_15": 0.12045876532793046, "rewards/frontier_coverage_20": 0.09563716128468513, "rewards/frontier_coverage_25": 0.06998000591993332, "rewards/frontier_coverage_5": 0.12251611799001694, "rewards/frontier_ece_reward": 0.0050458677113056185, "signal/accuracy_reward/centered_abs_mean": 0.0895751953125, "signal/accuracy_reward/group_std_mean": 0.1197155088186264, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04478759765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04478759765625, "signal/advantage_abs_mean": 0.05282713621854782, "signal/advantage_pre_scale_abs_mean": 0.05282713621854782, "signal/advantage_pre_scale_std": 0.09655779153108597, "signal/advantage_std": 0.09655779153108597, "signal/brier_reward/centered_abs_mean": 0.11980518698692322, "signal/brier_reward/group_std_mean": 0.15468465983867646, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014975648373365402, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014975648373365402, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02088698633015156, "signal/confidence_uniqueness_reward/group_std_mean": 0.02700975351035595, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002610873291268945, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002610873291268945, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020824840757995844, "signal/frontier_aurc_reward/group_std_mean": 0.003554532490670681, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.727646399056539e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.727646399056539e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15561709105968474, "signal/frontier_coverage_1/group_std_mean": 0.20363341569900512, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002785545913502574, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002785545913502574, "signal/frontier_coverage_10/centered_abs_mean": 0.15561709105968474, "signal/frontier_coverage_10/group_std_mean": 0.20363341569900512, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002785545913502574, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002785545913502574, "signal/frontier_coverage_15/centered_abs_mean": 0.15031374096870423, "signal/frontier_coverage_15/group_std_mean": 0.19690951704978943, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026906158309429884, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026906158309429884, "signal/frontier_coverage_20/centered_abs_mean": 0.10518187284469604, "signal/frontier_coverage_20/group_std_mean": 0.13869116008281707, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018827555235475303, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018827555235475303, "signal/frontier_coverage_25/centered_abs_mean": 0.06872652918100357, "signal/frontier_coverage_25/group_std_mean": 0.08967986106872558, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012302048038691283, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012302048038691283, "signal/frontier_coverage_5/centered_abs_mean": 0.15561709105968474, "signal/frontier_coverage_5/group_std_mean": 0.20363341569900512, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002785545913502574, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002785545913502574, "signal/frontier_ece_reward/centered_abs_mean": 0.005454682745039463, "signal/frontier_ece_reward/group_std_mean": 0.006951323244720697, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006818353431299329, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006818353431299329, "step": 230 }, { "calibration/aurc": 0.26921427859740704, "calibration/batch_distribution_entropy": 0.9213935076713792, "calibration/buffer_distribution_entropy": 0.9566270992945431, "calibration/confidence_entropy": 0.4192143462989858, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.1828125, "calibration/coverage@15%": 0.2375, "calibration/coverage@20%": 0.41640625, "calibration/coverage@25%": 0.540625, "calibration/coverage@30%": 0.63046875, "calibration/coverage@5%": 0.074609375, "calibration/ece": 0.11911352214410331, "calibration/mean_confidence": 0.46157543829664727, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 591.6, "completions/max_terminated_length": 591.6, "completions/mean_length": 244.7712890625, "completions/mean_terminated_length": 244.7712890625, "completions/min_length": 113.4, "completions/min_terminated_length": 113.4, "epoch": 0.752, "grad_norm": 0.0006762135890312493, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 801733286.0, "reward": 1.010276234149933, "reward_std": 0.06789586842060089, "rewards/accuracy_reward": 0.55498046875, "rewards/brier_reward": 0.8036497592926025, "rewards/confidence_uniqueness_reward": 0.9525466918945312, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0027418298181146384, "rewards/frontier_coverage_1": 0.1369162917137146, "rewards/frontier_coverage_10": 0.1369162917137146, "rewards/frontier_coverage_15": 0.12945626229047774, "rewards/frontier_coverage_20": 0.09780407398939132, "rewards/frontier_coverage_25": 0.06929152756929398, "rewards/frontier_coverage_5": 0.1369162917137146, "rewards/frontier_ece_reward": 0.005198706267401576, "signal/accuracy_reward/centered_abs_mean": 0.087127685546875, "signal/accuracy_reward/group_std_mean": 0.11871586441993713, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0435638427734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0435638427734375, "signal/advantage_abs_mean": 0.050392115116119386, "signal/advantage_pre_scale_abs_mean": 0.050392115116119386, "signal/advantage_pre_scale_std": 0.0956018552184105, "signal/advantage_std": 0.0956018552184105, "signal/brier_reward/centered_abs_mean": 0.10981836020946503, "signal/brier_reward/group_std_mean": 0.14380425959825516, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013727295026183129, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013727295026183129, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020843768119812013, "signal/confidence_uniqueness_reward/group_std_mean": 0.02636871188879013, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026054710149765016, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026054710149765016, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002333183842711151, "signal/frontier_aurc_reward/group_std_mean": 0.00404226235114038, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.176398906565737e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.176398906565737e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14656142741441727, "signal/frontier_coverage_1/group_std_mean": 0.19208039343357086, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026234494522213935, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026234494522213935, "signal/frontier_coverage_10/centered_abs_mean": 0.14656142741441727, "signal/frontier_coverage_10/group_std_mean": 0.19208039343357086, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026234494522213935, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026234494522213935, "signal/frontier_coverage_15/centered_abs_mean": 0.13771984726190567, "signal/frontier_coverage_15/group_std_mean": 0.18060652613639833, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002465185197070241, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002465185197070241, "signal/frontier_coverage_20/centered_abs_mean": 0.09731692224740982, "signal/frontier_coverage_20/group_std_mean": 0.12834441363811494, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001741972891613841, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001741972891613841, "signal/frontier_coverage_25/centered_abs_mean": 0.06323997154831887, "signal/frontier_coverage_25/group_std_mean": 0.08284124583005906, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011319954413920642, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011319954413920642, "signal/frontier_coverage_5/centered_abs_mean": 0.14656142741441727, "signal/frontier_coverage_5/group_std_mean": 0.19208039343357086, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026234494522213935, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026234494522213935, "signal/frontier_ece_reward/centered_abs_mean": 0.0052522880025207995, "signal/frontier_ece_reward/group_std_mean": 0.006774683482944965, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006565360003150999, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006565360003150999, "step": 235 }, { "calibration/aurc": 0.25391016797868754, "calibration/batch_distribution_entropy": 0.9626844496029806, "calibration/buffer_distribution_entropy": 0.9558027162029411, "calibration/confidence_entropy": 0.4521196567683129, "calibration/coverage@0%": 0.0632911876223092, "calibration/coverage@1%": 0.0707130626223092, "calibration/coverage@10%": 0.2926109955968689, "calibration/coverage@15%": 0.3672333659491194, "calibration/coverage@20%": 0.42779705846379645, "calibration/coverage@25%": 0.4707826259784736, "calibration/coverage@30%": 0.5833430161448141, "calibration/coverage@5%": 0.21447070694716244, "calibration/ece": 0.16563868062743387, "calibration/mean_confidence": 0.48634583285162664, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1148.4, "completions/max_terminated_length": 619.6, "completions/mean_length": 250.61728515625, "completions/mean_terminated_length": 250.24109802246093, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.768, "grad_norm": 0.0008496443624608219, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 819232311.0, "reward": 0.9929238677024841, "reward_std": 0.07314693182706833, "rewards/accuracy_reward": 0.51201171875, "rewards/brier_reward": 0.8128708124160766, "rewards/confidence_uniqueness_reward": 0.9537018656730651, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0023616855032742023, "rewards/frontier_coverage_1": 0.16877596378326415, "rewards/frontier_coverage_10": 0.16877596378326415, "rewards/frontier_coverage_15": 0.16228313446044923, "rewards/frontier_coverage_20": 0.11836232990026474, "rewards/frontier_coverage_25": 0.08216822892427444, "rewards/frontier_coverage_5": 0.16877596378326415, "rewards/frontier_ece_reward": 0.0058203617110848425, "signal/accuracy_reward/centered_abs_mean": 0.091363525390625, "signal/accuracy_reward/group_std_mean": 0.12203695029020309, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0456817626953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0456817626953125, "signal/advantage_abs_mean": 0.05535215809941292, "signal/advantage_pre_scale_abs_mean": 0.05535215809941292, "signal/advantage_pre_scale_std": 0.10177362710237503, "signal/advantage_std": 0.10177362710237503, "signal/brier_reward/centered_abs_mean": 0.11626919358968735, "signal/brier_reward/group_std_mean": 0.14906791150569915, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014533649198710918, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014533649198710918, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020532801002264022, "signal/confidence_uniqueness_reward/group_std_mean": 0.027031725272536278, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025666001252830028, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025666001252830028, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019179133465513586, "signal/frontier_aurc_reward/group_std_mean": 0.0032503914553672075, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4330648122704585e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4330648122704585e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1560559540987015, "signal/frontier_coverage_1/group_std_mean": 0.20065269768238067, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027934013400226832, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027934013400226832, "signal/frontier_coverage_10/centered_abs_mean": 0.1560559540987015, "signal/frontier_coverage_10/group_std_mean": 0.20065269768238067, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027934013400226832, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027934013400226832, "signal/frontier_coverage_15/centered_abs_mean": 0.14529191553592682, "signal/frontier_coverage_15/group_std_mean": 0.18685760498046874, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026007251348346473, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026007251348346473, "signal/frontier_coverage_20/centered_abs_mean": 0.10000549256801605, "signal/frontier_coverage_20/group_std_mean": 0.12909981608390808, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017900982638821006, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017900982638821006, "signal/frontier_coverage_25/centered_abs_mean": 0.06673097908496857, "signal/frontier_coverage_25/group_std_mean": 0.08542303293943405, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011944844853132963, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011944844853132963, "signal/frontier_coverage_5/centered_abs_mean": 0.1560559540987015, "signal/frontier_coverage_5/group_std_mean": 0.20065269768238067, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027934013400226832, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027934013400226832, "signal/frontier_ece_reward/centered_abs_mean": 0.0050937430001795295, "signal/frontier_ece_reward/group_std_mean": 0.00648985980078578, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006367178750224412, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006367178750224412, "step": 240 }, { "calibration/aurc": 0.33023066649008004, "calibration/batch_distribution_entropy": 0.9211474546608838, "calibration/buffer_distribution_entropy": 0.9545807950532765, "calibration/confidence_entropy": 0.4223679881144581, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.14375, "calibration/coverage@15%": 0.23984375, "calibration/coverage@20%": 0.298046875, "calibration/coverage@25%": 0.373828125, "calibration/coverage@30%": 0.425, "calibration/coverage@5%": 0.109375, "calibration/ece": 0.16420909419043678, "calibration/mean_confidence": 0.5384653354060752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 749.0, "completions/max_terminated_length": 749.0, "completions/mean_length": 252.1546875, "completions/mean_terminated_length": 252.1546875, "completions/min_length": 122.2, "completions/min_terminated_length": 122.2, "epoch": 0.784, "grad_norm": 0.0006861758301965892, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 836988743.0, "reward": 1.0032026648521424, "reward_std": 0.07250990495085716, "rewards/accuracy_reward": 0.54912109375, "rewards/brier_reward": 0.7853811979293823, "rewards/confidence_uniqueness_reward": 0.9548965454101562, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002936554979532957, "rewards/frontier_coverage_1": 0.115447399020195, "rewards/frontier_coverage_10": 0.11522519886493683, "rewards/frontier_coverage_15": 0.1076996922492981, "rewards/frontier_coverage_20": 0.08056422024965286, "rewards/frontier_coverage_25": 0.06024104133248329, "rewards/frontier_coverage_5": 0.115447399020195, "rewards/frontier_ece_reward": 0.004129563085734844, "signal/accuracy_reward/centered_abs_mean": 0.097381591796875, "signal/accuracy_reward/group_std_mean": 0.12832460254430772, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0486907958984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0486907958984375, "signal/advantage_abs_mean": 0.05545818880200386, "signal/advantage_pre_scale_abs_mean": 0.05545818880200386, "signal/advantage_pre_scale_std": 0.10165861696004867, "signal/advantage_std": 0.10165861696004867, "signal/brier_reward/centered_abs_mean": 0.11744992583990096, "signal/brier_reward/group_std_mean": 0.15015294551849365, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01468124072998762, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01468124072998762, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019893622398376463, "signal/confidence_uniqueness_reward/group_std_mean": 0.025294922292232513, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002486702799797058, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002486702799797058, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024152032332494856, "signal/frontier_aurc_reward/group_std_mean": 0.003975289314985276, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.323213652241975e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.323213652241975e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15085219144821166, "signal/frontier_coverage_1/group_std_mean": 0.19441507160663604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027002541813999415, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027002541813999415, "signal/frontier_coverage_10/centered_abs_mean": 0.15072461664676667, "signal/frontier_coverage_10/group_std_mean": 0.1942601442337036, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026979705318808554, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026979705318808554, "signal/frontier_coverage_15/centered_abs_mean": 0.1394365519285202, "signal/frontier_coverage_15/group_std_mean": 0.1801184743642807, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002495914185419679, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002495914185419679, "signal/frontier_coverage_20/centered_abs_mean": 0.09248919636011124, "signal/frontier_coverage_20/group_std_mean": 0.1202873170375824, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016555566107854247, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016555566107854247, "signal/frontier_coverage_25/centered_abs_mean": 0.06352094933390617, "signal/frontier_coverage_25/group_std_mean": 0.08175122737884521, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011370248859748245, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011370248859748245, "signal/frontier_coverage_5/centered_abs_mean": 0.15085219144821166, "signal/frontier_coverage_5/group_std_mean": 0.19441507160663604, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027002541813999415, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027002541813999415, "signal/frontier_ece_reward/centered_abs_mean": 0.005043382756412029, "signal/frontier_ece_reward/group_std_mean": 0.006396861933171749, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006304228445515037, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006304228445515037, "step": 245 }, { "calibration/aurc": 0.19597532611426857, "calibration/batch_distribution_entropy": 0.9391014856904241, "calibration/buffer_distribution_entropy": 0.9539163467672903, "calibration/confidence_entropy": 0.43941391694571175, "calibration/coverage@0%": 0.0609375, "calibration/coverage@1%": 0.0609375, "calibration/coverage@10%": 0.248828125, "calibration/coverage@15%": 0.488671875, "calibration/coverage@20%": 0.6046875, "calibration/coverage@25%": 0.673828125, "calibration/coverage@30%": 0.757421875, "calibration/coverage@5%": 0.13203125, "calibration/ece": 0.10801228966346155, "calibration/mean_confidence": 0.5153138521634614, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 736.8, "completions/max_terminated_length": 526.2, "completions/mean_length": 252.43125, "completions/mean_terminated_length": 252.30570373535156, "completions/min_length": 123.6, "completions/min_terminated_length": 123.6, "epoch": 0.8, "grad_norm": 0.0007989571313373744, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 854584199.0, "reward": 1.0239898920059205, "reward_std": 0.06917952895164489, "rewards/accuracy_reward": 0.580078125, "rewards/brier_reward": 0.8201135277748108, "rewards/confidence_uniqueness_reward": 0.9562228918075562, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0026165119837969542, "rewards/frontier_coverage_1": 0.12336181104183197, "rewards/frontier_coverage_10": 0.12248000055551529, "rewards/frontier_coverage_15": 0.11451495438814163, "rewards/frontier_coverage_20": 0.08619352877140045, "rewards/frontier_coverage_25": 0.06512454897165298, "rewards/frontier_coverage_5": 0.12336181104183197, "rewards/frontier_ece_reward": 0.005098512535914779, "signal/accuracy_reward/centered_abs_mean": 0.089453125, "signal/accuracy_reward/group_std_mean": 0.12056980878114701, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0447265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0447265625, "signal/advantage_abs_mean": 0.05227528065443039, "signal/advantage_pre_scale_abs_mean": 0.05227528065443039, "signal/advantage_pre_scale_std": 0.10132318586111069, "signal/advantage_std": 0.10132318586111069, "signal/brier_reward/centered_abs_mean": 0.10401596128940582, "signal/brier_reward/group_std_mean": 0.1339889034628868, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013001995161175728, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013001995161175728, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01882171183824539, "signal/confidence_uniqueness_reward/group_std_mean": 0.023961442708969116, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002352713979780674, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002352713979780674, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020810413639992475, "signal/frontier_aurc_reward/group_std_mean": 0.0032477714121341705, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.725063943420537e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.725063943420537e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1313213363289833, "signal/frontier_coverage_1/group_std_mean": 0.1710223823785782, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002350651752203703, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002350651752203703, "signal/frontier_coverage_10/centered_abs_mean": 0.1297929286956787, "signal/frontier_coverage_10/group_std_mean": 0.16902453005313872, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002323293359950185, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002323293359950185, "signal/frontier_coverage_15/centered_abs_mean": 0.11685995012521744, "signal/frontier_coverage_15/group_std_mean": 0.15256010591983796, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020917929941788316, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020917929941788316, "signal/frontier_coverage_20/centered_abs_mean": 0.08047119081020356, "signal/frontier_coverage_20/group_std_mean": 0.10576021820306777, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014404343208298087, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014404343208298087, "signal/frontier_coverage_25/centered_abs_mean": 0.054990262538194654, "signal/frontier_coverage_25/group_std_mean": 0.07126960307359695, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000984325702302158, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000984325702302158, "signal/frontier_coverage_5/centered_abs_mean": 0.1313213363289833, "signal/frontier_coverage_5/group_std_mean": 0.1710223823785782, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002350651752203703, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002350651752203703, "signal/frontier_ece_reward/centered_abs_mean": 0.004696264863014221, "signal/frontier_ece_reward/group_std_mean": 0.005948188435286284, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005870331078767776, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005870331078767776, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4142328683970304, "eval_calibration/batch_distribution_entropy": 0.8993784848821733, "eval_calibration/buffer_distribution_entropy": 0.9542638871252693, "eval_calibration/confidence_entropy": 0.4208433244117349, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.1015625, "eval_calibration/coverage@20%": 0.203125, "eval_calibration/coverage@25%": 0.296875, "eval_calibration/coverage@30%": 0.3125, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.1930786248852066, "eval_calibration/mean_confidence": 0.4963598748852066, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 476.0, "eval_completions/max_terminated_length": 476.0, "eval_completions/mean_length": 255.57765197753906, "eval_completions/mean_terminated_length": 255.57765197753906, "eval_completions/min_length": 135.75, "eval_completions/min_terminated_length": 135.75, "eval_loss": 0.0, "eval_num_tokens": 854584199.0, "eval_reward": 0.9532016068696976, "eval_reward_std": 0.23495937138795853, "eval_rewards/accuracy_reward": 0.4453125, "eval_rewards/brier_reward": 0.7989254742860794, "eval_rewards/confidence_uniqueness_reward": 0.901123046875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0034314218210056424, "eval_rewards/frontier_coverage_1": 0.1996590532362461, "eval_rewards/frontier_coverage_10": 0.19675859063863754, "eval_rewards/frontier_coverage_15": 0.1782199591398239, "eval_rewards/frontier_coverage_20": 0.12300213798880577, "eval_rewards/frontier_coverage_25": 0.07886525429785252, "eval_rewards/frontier_coverage_5": 0.1996590532362461, "eval_rewards/frontier_ece_reward": 0.005018939729779959, "eval_runtime": 23.4926, "eval_samples_per_second": 21.283, "eval_signal/accuracy_reward/centered_abs_mean": 0.4755859375, "eval_signal/accuracy_reward/group_std_mean": 0.4951448142528534, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23779296875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23779296875, "eval_signal/advantage_abs_mean": 0.21880921721458435, "eval_signal/advantage_pre_scale_abs_mean": 0.21880921721458435, "eval_signal/advantage_pre_scale_std": 0.2323761023581028, "eval_signal/advantage_std": 0.2323761023581028, "eval_signal/brier_reward/centered_abs_mean": 0.20660649985074997, "eval_signal/brier_reward/group_std_mean": 0.25988300889730453, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025825812481343746, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.025825812481343746, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0436553955078125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05071157868951559, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054569244384765625, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054569244384765625, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004325759713537991, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008416089694947004, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.743109745206311e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.743109745206311e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3520267680287361, "eval_signal/frontier_coverage_1/group_std_mean": 0.43477170169353485, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006301278946921229, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006301278946921229, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.34786005318164825, "eval_signal/frontier_coverage_10/group_std_mean": 0.42980340868234634, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00622669467702508, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00622669467702508, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3130974769592285, "eval_signal/frontier_coverage_15/group_std_mean": 0.38755345344543457, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0056044444208964705, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0056044444208964705, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20560914278030396, "eval_signal/frontier_coverage_20/group_std_mean": 0.26048335433006287, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003680403344333172, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003680403344333172, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.11794870160520077, "eval_signal/frontier_coverage_25/group_std_mean": 0.15224769711494446, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002111281646648422, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002111281646648422, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3520267680287361, "eval_signal/frontier_coverage_5/group_std_mean": 0.43477170169353485, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006301278946921229, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006301278946921229, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.007750884513370693, "eval_signal/frontier_ece_reward/group_std_mean": 0.00966967479325831, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009688605641713366, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009688605641713366, "eval_steps_per_second": 0.17, "step": 250 }, { "calibration/aurc": 0.22341894121568937, "calibration/batch_distribution_entropy": 0.8868936837741197, "calibration/buffer_distribution_entropy": 0.9534030407270027, "calibration/confidence_entropy": 0.40355038966953816, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.158203125, "calibration/coverage@15%": 0.26796875, "calibration/coverage@20%": 0.391015625, "calibration/coverage@25%": 0.720703125, "calibration/coverage@30%": 0.805859375, "calibration/coverage@5%": 0.109375, "calibration/ece": 0.13161625000000002, "calibration/mean_confidence": 0.567055625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.6, "completions/max_terminated_length": 572.6, "completions/mean_length": 250.9498046875, "completions/mean_terminated_length": 250.9498046875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.816, "grad_norm": 0.0008308417163789272, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 872253093.0, "reward": 1.0168833494186402, "reward_std": 0.07324363887310029, "rewards/accuracy_reward": 0.58017578125, "rewards/brier_reward": 0.7875685572624207, "rewards/confidence_uniqueness_reward": 0.9539688110351563, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0030643716920167206, "rewards/frontier_coverage_1": 0.09161564782261848, "rewards/frontier_coverage_10": 0.09113903939723969, "rewards/frontier_coverage_15": 0.08651385009288788, "rewards/frontier_coverage_20": 0.06792352050542831, "rewards/frontier_coverage_25": 0.054780172556638716, "rewards/frontier_coverage_5": 0.09161564782261848, "rewards/frontier_ece_reward": 0.0040153548121452335, "signal/accuracy_reward/centered_abs_mean": 0.096136474609375, "signal/accuracy_reward/group_std_mean": 0.12627903670072554, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0480682373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0480682373046875, "signal/advantage_abs_mean": 0.05611480250954628, "signal/advantage_pre_scale_abs_mean": 0.05611480250954628, "signal/advantage_pre_scale_std": 0.10440016239881515, "signal/advantage_std": 0.10440016239881515, "signal/brier_reward/centered_abs_mean": 0.1229382187128067, "signal/brier_reward/group_std_mean": 0.15722771883010864, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015367277339100838, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015367277339100838, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021144795417785644, "signal/confidence_uniqueness_reward/group_std_mean": 0.027067623659968378, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026430994272232055, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026430994272232055, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00280195998493582, "signal/frontier_aurc_reward/group_std_mean": 0.004848680645227432, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.015508249925915e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.015508249925915e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14679521322250366, "signal/frontier_coverage_1/group_std_mean": 0.1901654928922653, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026276342570781706, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026276342570781706, "signal/frontier_coverage_10/centered_abs_mean": 0.1450937107205391, "signal/frontier_coverage_10/group_std_mean": 0.1880299925804138, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025971772614866496, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025971772614866496, "signal/frontier_coverage_15/centered_abs_mean": 0.1285434916615486, "signal/frontier_coverage_15/group_std_mean": 0.16707846224308015, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002300928346812725, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002300928346812725, "signal/frontier_coverage_20/centered_abs_mean": 0.0890080213546753, "signal/frontier_coverage_20/group_std_mean": 0.11617460995912551, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015932435402646662, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015932435402646662, "signal/frontier_coverage_25/centered_abs_mean": 0.06208330765366554, "signal/frontier_coverage_25/group_std_mean": 0.07998019456863403, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011112912092357875, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011112912092357875, "signal/frontier_coverage_5/centered_abs_mean": 0.14679521322250366, "signal/frontier_coverage_5/group_std_mean": 0.1901654928922653, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026276342570781706, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026276342570781706, "signal/frontier_ece_reward/centered_abs_mean": 0.004793836083263159, "signal/frontier_ece_reward/group_std_mean": 0.006085961498320103, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005992295104078948, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005992295104078948, "step": 255 }, { "calibration/aurc": 0.2786088570549718, "calibration/batch_distribution_entropy": 0.938188176116842, "calibration/buffer_distribution_entropy": 0.9518384077690992, "calibration/confidence_entropy": 0.43254885404867444, "calibration/coverage@0%": 0.037890625, "calibration/coverage@1%": 0.037890625, "calibration/coverage@10%": 0.238671875, "calibration/coverage@15%": 0.308203125, "calibration/coverage@20%": 0.358984375, "calibration/coverage@25%": 0.414453125, "calibration/coverage@30%": 0.515234375, "calibration/coverage@5%": 0.17109375, "calibration/ece": 0.11687695312500002, "calibration/mean_confidence": 0.506845703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 742.0, "completions/max_terminated_length": 536.0, "completions/mean_length": 252.67412109375, "completions/mean_terminated_length": 252.54882202148437, "completions/min_length": 115.6, "completions/min_terminated_length": 115.6, "epoch": 0.832, "grad_norm": 0.0008032754994928837, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 889848828.0, "reward": 1.0108869075775146, "reward_std": 0.0699038602411747, "rewards/accuracy_reward": 0.550390625, "rewards/brier_reward": 0.8212600588798523, "rewards/confidence_uniqueness_reward": 0.9529539108276367, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002241781633347273, "rewards/frontier_coverage_1": 0.14855314195156097, "rewards/frontier_coverage_10": 0.14673392921686174, "rewards/frontier_coverage_15": 0.12972736209630967, "rewards/frontier_coverage_20": 0.09893926084041596, "rewards/frontier_coverage_25": 0.07374034821987152, "rewards/frontier_coverage_5": 0.14855314195156097, "rewards/frontier_ece_reward": 0.005167901515960693, "signal/accuracy_reward/centered_abs_mean": 0.0928466796875, "signal/accuracy_reward/group_std_mean": 0.12302683144807816, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04642333984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04642333984375, "signal/advantage_abs_mean": 0.053195972740650174, "signal/advantage_pre_scale_abs_mean": 0.053195972740650174, "signal/advantage_pre_scale_std": 0.10306639075279236, "signal/advantage_std": 0.10306639075279236, "signal/brier_reward/centered_abs_mean": 0.10160237550735474, "signal/brier_reward/group_std_mean": 0.13150315284729003, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012700296938419342, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012700296938419342, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02115917094051838, "signal/confidence_uniqueness_reward/group_std_mean": 0.02714742161333561, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026448963675647975, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026448963675647975, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018434126162901522, "signal/frontier_aurc_reward/group_std_mean": 0.0030642326921224592, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.299708623671904e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.299708623671904e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13409744948148727, "signal/frontier_coverage_1/group_std_mean": 0.17334451973438264, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002400344191119075, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002400344191119075, "signal/frontier_coverage_10/centered_abs_mean": 0.13227225542068483, "signal/frontier_coverage_10/group_std_mean": 0.1710406243801117, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023676733020693065, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023676733020693065, "signal/frontier_coverage_15/centered_abs_mean": 0.11501559019088745, "signal/frontier_coverage_15/group_std_mean": 0.1489970475435257, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002058779005892575, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002058779005892575, "signal/frontier_coverage_20/centered_abs_mean": 0.08325291574001312, "signal/frontier_coverage_20/group_std_mean": 0.10779815167188644, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014902271097525955, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014902271097525955, "signal/frontier_coverage_25/centered_abs_mean": 0.0572903573513031, "signal/frontier_coverage_25/group_std_mean": 0.07322717756032944, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010254973545670508, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010254973545670508, "signal/frontier_coverage_5/centered_abs_mean": 0.13409744948148727, "signal/frontier_coverage_5/group_std_mean": 0.17334451973438264, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002400344191119075, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002400344191119075, "signal/frontier_ece_reward/centered_abs_mean": 0.004283274430781603, "signal/frontier_ece_reward/group_std_mean": 0.005466235801577568, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005354093038477004, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005354093038477004, "step": 260 }, { "calibration/aurc": 0.31810909498840123, "calibration/batch_distribution_entropy": 0.9300493723481166, "calibration/buffer_distribution_entropy": 0.9525486135022445, "calibration/confidence_entropy": 0.4521314722308912, "calibration/coverage@0%": 0.021484375, "calibration/coverage@1%": 0.021484375, "calibration/coverage@10%": 0.18203125, "calibration/coverage@15%": 0.271484375, "calibration/coverage@20%": 0.4078125, "calibration/coverage@25%": 0.471484375, "calibration/coverage@30%": 0.544921875, "calibration/coverage@5%": 0.1015625, "calibration/ece": 0.13904648437499995, "calibration/mean_confidence": 0.572958203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 669.0, "completions/max_terminated_length": 669.0, "completions/mean_length": 250.8658203125, "completions/mean_terminated_length": 250.8658203125, "completions/min_length": 126.6, "completions/min_terminated_length": 126.6, "epoch": 0.848, "grad_norm": 0.000901456514839083, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 907432062.0, "reward": 0.9951120018959045, "reward_std": 0.06787320524454117, "rewards/accuracy_reward": 0.5255859375, "rewards/brier_reward": 0.7996316909790039, "rewards/confidence_uniqueness_reward": 0.9561546325683594, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0026320197619497778, "rewards/frontier_coverage_1": 0.13928882628679276, "rewards/frontier_coverage_10": 0.1381850004196167, "rewards/frontier_coverage_15": 0.11995747685432434, "rewards/frontier_coverage_20": 0.08737777099013329, "rewards/frontier_coverage_25": 0.06390020698308944, "rewards/frontier_coverage_5": 0.13928882628679276, "rewards/frontier_ece_reward": 0.004621562361717224, "signal/accuracy_reward/centered_abs_mean": 0.08026123046875, "signal/accuracy_reward/group_std_mean": 0.10938422381877899, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040130615234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.040130615234375, "signal/advantage_abs_mean": 0.0514536626636982, "signal/advantage_pre_scale_abs_mean": 0.0514536626636982, "signal/advantage_pre_scale_std": 0.09874935895204544, "signal/advantage_std": 0.09874935895204544, "signal/brier_reward/centered_abs_mean": 0.10689050555229188, "signal/brier_reward/group_std_mean": 0.13864734917879104, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013361313194036484, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013361313194036484, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019451355934143065, "signal/confidence_uniqueness_reward/group_std_mean": 0.024612750858068466, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002431419491767883, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002431419491767883, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002004986209794879, "signal/frontier_aurc_reward/group_std_mean": 0.003225843422114849, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.588925173971802e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.588925173971802e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13043643683195114, "signal/frontier_coverage_1/group_std_mean": 0.1731602132320404, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023348120506852866, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023348120506852866, "signal/frontier_coverage_10/centered_abs_mean": 0.12900474667549133, "signal/frontier_coverage_10/group_std_mean": 0.17132607698440552, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023091848473995925, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023091848473995925, "signal/frontier_coverage_15/centered_abs_mean": 0.11192914545536041, "signal/frontier_coverage_15/group_std_mean": 0.1488051563501358, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002003531623631716, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002003531623631716, "signal/frontier_coverage_20/centered_abs_mean": 0.0816087007522583, "signal/frontier_coverage_20/group_std_mean": 0.10843551754951478, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014607956632971763, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014607956632971763, "signal/frontier_coverage_25/centered_abs_mean": 0.05602134019136429, "signal/frontier_coverage_25/group_std_mean": 0.07327790409326554, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010027819662354887, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010027819662354887, "signal/frontier_coverage_5/centered_abs_mean": 0.13043643683195114, "signal/frontier_coverage_5/group_std_mean": 0.1731602132320404, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023348120506852866, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023348120506852866, "signal/frontier_ece_reward/centered_abs_mean": 0.00407783156260848, "signal/frontier_ece_reward/group_std_mean": 0.005348461586982012, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00050972894532606, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00050972894532606, "step": 265 }, { "calibration/aurc": 0.271196127415333, "calibration/batch_distribution_entropy": 0.9267513408952057, "calibration/buffer_distribution_entropy": 0.953885675345784, "calibration/confidence_entropy": 0.4640439064202364, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.14609375, "calibration/coverage@15%": 0.1921875, "calibration/coverage@20%": 0.3625, "calibration/coverage@25%": 0.43515625, "calibration/coverage@30%": 0.49765625, "calibration/coverage@5%": 0.069921875, "calibration/ece": 0.14137528043337522, "calibration/mean_confidence": 0.618042606722683, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1146.8, "completions/max_terminated_length": 691.6, "completions/mean_length": 253.50556640625, "completions/mean_terminated_length": 253.00428466796876, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.864, "grad_norm": 0.0008237656438723207, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 925014775.0, "reward": 1.0212122201919556, "reward_std": 0.07169701382517815, "rewards/accuracy_reward": 0.58408203125, "rewards/brier_reward": 0.8013546705245972, "rewards/confidence_uniqueness_reward": 0.95478835105896, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0022963778115808963, "rewards/frontier_coverage_1": 0.10232354998588562, "rewards/frontier_coverage_10": 0.1015251636505127, "rewards/frontier_coverage_15": 0.09124607294797897, "rewards/frontier_coverage_20": 0.07141445800662041, "rewards/frontier_coverage_25": 0.056721173226833344, "rewards/frontier_coverage_5": 0.10232354998588562, "rewards/frontier_ece_reward": 0.003858886519446969, "signal/accuracy_reward/centered_abs_mean": 0.092730712890625, "signal/accuracy_reward/group_std_mean": 0.12438704073429108, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0463653564453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0463653564453125, "signal/advantage_abs_mean": 0.05372531041502952, "signal/advantage_pre_scale_abs_mean": 0.05372531041502952, "signal/advantage_pre_scale_std": 0.10223406553268433, "signal/advantage_std": 0.10223406553268433, "signal/brier_reward/centered_abs_mean": 0.11028100401163102, "signal/brier_reward/group_std_mean": 0.14314747452735901, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013785125501453877, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013785125501453877, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019714291021227837, "signal/confidence_uniqueness_reward/group_std_mean": 0.025689712166786192, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024642863776534797, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024642863776534797, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019337810343131423, "signal/frontier_aurc_reward/group_std_mean": 0.0032589809503406285, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.461468186287675e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.461468186287675e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14040221869945527, "signal/frontier_coverage_1/group_std_mean": 0.18402603268623352, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025131995789706707, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025131995789706707, "signal/frontier_coverage_10/centered_abs_mean": 0.139146026968956, "signal/frontier_coverage_10/group_std_mean": 0.18235517740249635, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024907137267291546, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024907137267291546, "signal/frontier_coverage_15/centered_abs_mean": 0.11923650801181793, "signal/frontier_coverage_15/group_std_mean": 0.15632621049880982, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021343334345147015, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021343334345147015, "signal/frontier_coverage_20/centered_abs_mean": 0.08577980250120162, "signal/frontier_coverage_20/group_std_mean": 0.11253109723329544, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001535458443686366, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001535458443686366, "signal/frontier_coverage_25/centered_abs_mean": 0.05949989929795265, "signal/frontier_coverage_25/group_std_mean": 0.07727274596691132, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010650481563061476, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010650481563061476, "signal/frontier_coverage_5/centered_abs_mean": 0.14040221869945527, "signal/frontier_coverage_5/group_std_mean": 0.18402603268623352, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025131995789706707, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025131995789706707, "signal/frontier_ece_reward/centered_abs_mean": 0.004180350759997964, "signal/frontier_ece_reward/group_std_mean": 0.0054055553860962394, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005225438449997455, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005225438449997455, "step": 270 }, { "calibration/aurc": 0.3662187271340662, "calibration/batch_distribution_entropy": 0.9424121541193221, "calibration/buffer_distribution_entropy": 0.9544058121376885, "calibration/confidence_entropy": 0.4228504678202111, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.045703125, "calibration/coverage@15%": 0.075390625, "calibration/coverage@20%": 0.130859375, "calibration/coverage@25%": 0.232421875, "calibration/coverage@30%": 0.31875, "calibration/coverage@5%": 0.0234375, "calibration/ece": 0.15656218074772288, "calibration/mean_confidence": 0.5140074932477229, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 560.2, "completions/max_terminated_length": 560.2, "completions/mean_length": 251.0052734375, "completions/mean_terminated_length": 251.0052734375, "completions/min_length": 121.8, "completions/min_terminated_length": 121.8, "epoch": 0.88, "grad_norm": 0.0007305578328669071, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 942732141.0, "reward": 0.9916547417640686, "reward_std": 0.06835338175296783, "rewards/accuracy_reward": 0.518359375, "rewards/brier_reward": 0.7975268721580505, "rewards/confidence_uniqueness_reward": 0.9533485412597656, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002605132572352886, "rewards/frontier_coverage_1": 0.1489662915468216, "rewards/frontier_coverage_10": 0.14885261952877044, "rewards/frontier_coverage_15": 0.12518833130598067, "rewards/frontier_coverage_20": 0.09161524027585984, "rewards/frontier_coverage_25": 0.06737890988588333, "rewards/frontier_coverage_5": 0.1489662915468216, "rewards/frontier_ece_reward": 0.004623535228893161, "signal/accuracy_reward/centered_abs_mean": 0.0923828125, "signal/accuracy_reward/group_std_mean": 0.11860855221748352, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04619140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04619140625, "signal/advantage_abs_mean": 0.05325910523533821, "signal/advantage_pre_scale_abs_mean": 0.05325910523533821, "signal/advantage_pre_scale_std": 0.09945199489593506, "signal/advantage_std": 0.09945199489593506, "signal/brier_reward/centered_abs_mean": 0.11102102249860764, "signal/brier_reward/group_std_mean": 0.14243731796741485, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013877627812325955, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013877627812325955, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020394396781921387, "signal/confidence_uniqueness_reward/group_std_mean": 0.025670462101697922, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025492995977401734, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025492995977401734, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020469398470595477, "signal/frontier_aurc_reward/group_std_mean": 0.0033199348486959933, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.664021860458888e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.664021860458888e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15008485019207002, "signal/frontier_coverage_1/group_std_mean": 0.19261721670627593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026865187101066113, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026865187101066113, "signal/frontier_coverage_10/centered_abs_mean": 0.14985155463218688, "signal/frontier_coverage_10/group_std_mean": 0.19233030080795288, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002682342706248164, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002682342706248164, "signal/frontier_coverage_15/centered_abs_mean": 0.12626577615737916, "signal/frontier_coverage_15/group_std_mean": 0.16239021718502045, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002260157372802496, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002260157372802496, "signal/frontier_coverage_20/centered_abs_mean": 0.09228641092777252, "signal/frontier_coverage_20/group_std_mean": 0.11892776638269424, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016519267112016678, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016519267112016678, "signal/frontier_coverage_25/centered_abs_mean": 0.062046286463737485, "signal/frontier_coverage_25/group_std_mean": 0.07945701777935028, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011106284568086267, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011106284568086267, "signal/frontier_coverage_5/centered_abs_mean": 0.15008485019207002, "signal/frontier_coverage_5/group_std_mean": 0.19261721670627593, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026865187101066113, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026865187101066113, "signal/frontier_ece_reward/centered_abs_mean": 0.004392636381089688, "signal/frontier_ece_reward/group_std_mean": 0.0055966474115848545, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000549079547636211, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000549079547636211, "step": 275 }, { "calibration/aurc": 0.37704128995461206, "calibration/batch_distribution_entropy": 0.9144648554628553, "calibration/buffer_distribution_entropy": 0.9541376919449341, "calibration/confidence_entropy": 0.4211639999263033, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.076171875, "calibration/coverage@15%": 0.139453125, "calibration/coverage@20%": 0.18125, "calibration/coverage@25%": 0.23515625, "calibration/coverage@30%": 0.42109375, "calibration/coverage@5%": 0.04140625, "calibration/ece": 0.16247158693895747, "calibration/mean_confidence": 0.5423721630610425, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 660.4, "completions/max_terminated_length": 660.4, "completions/mean_length": 249.89990234375, "completions/mean_terminated_length": 249.89990234375, "completions/min_length": 127.6, "completions/min_terminated_length": 127.6, "epoch": 0.896, "grad_norm": 0.0009292135946452618, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 960401964.0, "reward": 1.0014619946479797, "reward_std": 0.06392379850149155, "rewards/accuracy_reward": 0.53779296875, "rewards/brier_reward": 0.8009364008903503, "rewards/confidence_uniqueness_reward": 0.9536941528320313, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0029405945912003516, "rewards/frontier_coverage_1": 0.1434343695640564, "rewards/frontier_coverage_10": 0.14290477931499482, "rewards/frontier_coverage_15": 0.12304576188325882, "rewards/frontier_coverage_20": 0.09274870157241821, "rewards/frontier_coverage_25": 0.0677462287247181, "rewards/frontier_coverage_5": 0.1434343695640564, "rewards/frontier_ece_reward": 0.004168036207556724, "signal/accuracy_reward/centered_abs_mean": 0.084637451171875, "signal/accuracy_reward/group_std_mean": 0.11610205471515656, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0423187255859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0423187255859375, "signal/advantage_abs_mean": 0.047319182008504865, "signal/advantage_pre_scale_abs_mean": 0.047319182008504865, "signal/advantage_pre_scale_std": 0.09262912273406983, "signal/advantage_std": 0.09262912273406983, "signal/brier_reward/centered_abs_mean": 0.10917495787143708, "signal/brier_reward/group_std_mean": 0.1393113523721695, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013646869733929635, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013646869733929635, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019398140907287597, "signal/confidence_uniqueness_reward/group_std_mean": 0.02443733625113964, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024247676134109496, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024247676134109496, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022043085657060145, "signal/frontier_aurc_reward/group_std_mean": 0.0035537141375243664, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.945712269342039e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.945712269342039e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1438123404979706, "signal/frontier_coverage_1/group_std_mean": 0.18452912867069243, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002574240742251277, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002574240742251277, "signal/frontier_coverage_10/centered_abs_mean": 0.1431819975376129, "signal/frontier_coverage_10/group_std_mean": 0.1837300330400467, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002562957629561424, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002562957629561424, "signal/frontier_coverage_15/centered_abs_mean": 0.12040194422006607, "signal/frontier_coverage_15/group_std_mean": 0.15485568046569825, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021551947575062513, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021551947575062513, "signal/frontier_coverage_20/centered_abs_mean": 0.08914662450551987, "signal/frontier_coverage_20/group_std_mean": 0.11499444544315338, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015957244904711843, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015957244904711843, "signal/frontier_coverage_25/centered_abs_mean": 0.060103370994329455, "signal/frontier_coverage_25/group_std_mean": 0.07683221846818925, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010758502641692758, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010758502641692758, "signal/frontier_coverage_5/centered_abs_mean": 0.1438123404979706, "signal/frontier_coverage_5/group_std_mean": 0.18452912867069243, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002574240742251277, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002574240742251277, "signal/frontier_ece_reward/centered_abs_mean": 0.004060229659080506, "signal/frontier_ece_reward/group_std_mean": 0.005199447367340326, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005075287073850632, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005075287073850632, "step": 280 }, { "calibration/aurc": 0.36122752635898076, "calibration/batch_distribution_entropy": 0.946771030985343, "calibration/buffer_distribution_entropy": 0.9530327177612656, "calibration/confidence_entropy": 0.44074749209441855, "calibration/coverage@0%": 0.030124080882352945, "calibration/coverage@1%": 0.030124080882352945, "calibration/coverage@10%": 0.11731311274509804, "calibration/coverage@15%": 0.20526654411764705, "calibration/coverage@20%": 0.31317248774509804, "calibration/coverage@25%": 0.37570925245098036, "calibration/coverage@30%": 0.46482536764705884, "calibration/coverage@5%": 0.05438419117647059, "calibration/ece": 0.14812835716976971, "calibration/mean_confidence": 0.48950283305724307, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 782.2, "completions/max_terminated_length": 605.4, "completions/mean_length": 249.14404296875, "completions/mean_terminated_length": 248.89211730957032, "completions/min_length": 122.2, "completions/min_terminated_length": 122.2, "epoch": 0.912, "grad_norm": 0.0009797021048143506, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 978004495.0, "reward": 1.0036668181419373, "reward_std": 0.06746198162436486, "rewards/accuracy_reward": 0.5416015625, "rewards/brier_reward": 0.8046979784965516, "rewards/confidence_uniqueness_reward": 0.9551046133041382, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002108183712698519, "rewards/frontier_coverage_1": 0.13762879073619844, "rewards/frontier_coverage_10": 0.13762879073619844, "rewards/frontier_coverage_15": 0.12159725055098533, "rewards/frontier_coverage_20": 0.09403416961431503, "rewards/frontier_coverage_25": 0.0696952298283577, "rewards/frontier_coverage_5": 0.13762879073619844, "rewards/frontier_ece_reward": 0.004224732192233205, "signal/accuracy_reward/centered_abs_mean": 0.08880615234375, "signal/accuracy_reward/group_std_mean": 0.11999956220388412, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044403076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044403076171875, "signal/advantage_abs_mean": 0.05088106840848923, "signal/advantage_pre_scale_abs_mean": 0.05088106840848923, "signal/advantage_pre_scale_std": 0.09543706178665161, "signal/advantage_std": 0.09543706178665161, "signal/brier_reward/centered_abs_mean": 0.1156904086470604, "signal/brier_reward/group_std_mean": 0.14857376515865325, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446130108088255, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01446130108088255, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01911727674305439, "signal/confidence_uniqueness_reward/group_std_mean": 0.02458658292889595, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002389659592881799, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002389659592881799, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016015514265745878, "signal/frontier_aurc_reward/group_std_mean": 0.0026247325353324414, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8667769583989866e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8667769583989866e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15861463844776152, "signal/frontier_coverage_1/group_std_mean": 0.20468551516532899, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028392020147293808, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028392020147293808, "signal/frontier_coverage_10/centered_abs_mean": 0.15861463844776152, "signal/frontier_coverage_10/group_std_mean": 0.20468551516532899, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028392020147293808, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028392020147293808, "signal/frontier_coverage_15/centered_abs_mean": 0.13538606017827987, "signal/frontier_coverage_15/group_std_mean": 0.17446688711643218, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002423410303890705, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002423410303890705, "signal/frontier_coverage_20/centered_abs_mean": 0.09944360852241516, "signal/frontier_coverage_20/group_std_mean": 0.12812657803297042, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017800404457375407, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017800404457375407, "signal/frontier_coverage_25/centered_abs_mean": 0.06665683835744858, "signal/frontier_coverage_25/group_std_mean": 0.08519981652498246, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001193157327361405, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001193157327361405, "signal/frontier_coverage_5/centered_abs_mean": 0.15861463844776152, "signal/frontier_coverage_5/group_std_mean": 0.20468551516532899, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028392020147293808, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028392020147293808, "signal/frontier_ece_reward/centered_abs_mean": 0.004177849320694804, "signal/frontier_ece_reward/group_std_mean": 0.005369494389742613, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005222311650868505, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005222311650868505, "step": 285 }, { "calibration/aurc": 0.37730223006107255, "calibration/batch_distribution_entropy": 0.9507460509748444, "calibration/buffer_distribution_entropy": 0.9543901529907087, "calibration/confidence_entropy": 0.45204149685146594, "calibration/coverage@0%": 0.011721813725490197, "calibration/coverage@1%": 0.011721813725490197, "calibration/coverage@10%": 0.026174938725490194, "calibration/coverage@15%": 0.057815563725490196, "calibration/coverage@20%": 0.1596936274509804, "calibration/coverage@25%": 0.24418198529411766, "calibration/coverage@30%": 0.3165104166666667, "calibration/coverage@5%": 0.011721813725490197, "calibration/ece": 0.12675503829656865, "calibration/mean_confidence": 0.4814181510416667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 788.8, "completions/max_terminated_length": 663.4, "completions/mean_length": 240.012890625, "completions/mean_terminated_length": 239.75939025878907, "completions/min_length": 121.2, "completions/min_terminated_length": 121.2, "epoch": 0.928, "grad_norm": 0.0005975121166557074, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 995489043.0, "reward": 0.9937048196792603, "reward_std": 0.06352801769971847, "rewards/accuracy_reward": 0.52548828125, "rewards/brier_reward": 0.7934018492698669, "rewards/confidence_uniqueness_reward": 0.9475328207015992, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0023260547081008554, "rewards/frontier_coverage_1": 0.14467951208353041, "rewards/frontier_coverage_10": 0.14467951208353041, "rewards/frontier_coverage_15": 0.12346935272216797, "rewards/frontier_coverage_20": 0.09609992057085037, "rewards/frontier_coverage_25": 0.07109370082616806, "rewards/frontier_coverage_5": 0.14467951208353041, "rewards/frontier_ece_reward": 0.00408800826407969, "signal/accuracy_reward/centered_abs_mean": 0.078668212890625, "signal/accuracy_reward/group_std_mean": 0.11027546375989913, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393341064453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0393341064453125, "signal/advantage_abs_mean": 0.04622294753789902, "signal/advantage_pre_scale_abs_mean": 0.04622294753789902, "signal/advantage_pre_scale_std": 0.09103738218545913, "signal/advantage_std": 0.09103738218545913, "signal/brier_reward/centered_abs_mean": 0.10818531513214111, "signal/brier_reward/group_std_mean": 0.1421953484416008, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013523164391517638, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013523164391517638, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02333177290856838, "signal/confidence_uniqueness_reward/group_std_mean": 0.029623343050479888, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029164716135710476, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029164716135710476, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017985031008720398, "signal/frontier_aurc_reward/group_std_mean": 0.002986391820013523, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.219320460630115e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.219320460630115e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1464044988155365, "signal/frontier_coverage_1/group_std_mean": 0.19371420741081238, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620640443637967, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620640443637967, "signal/frontier_coverage_10/centered_abs_mean": 0.1464044988155365, "signal/frontier_coverage_10/group_std_mean": 0.19371420741081238, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620640443637967, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620640443637967, "signal/frontier_coverage_15/centered_abs_mean": 0.12273292541503907, "signal/frontier_coverage_15/group_std_mean": 0.1628485679626465, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002196919359266758, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002196919359266758, "signal/frontier_coverage_20/centered_abs_mean": 0.09006080776453018, "signal/frontier_coverage_20/group_std_mean": 0.1197909340262413, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016120884567499161, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016120884567499161, "signal/frontier_coverage_25/centered_abs_mean": 0.06147329062223435, "signal/frontier_coverage_25/group_std_mean": 0.08052114397287369, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011003718711435795, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011003718711435795, "signal/frontier_coverage_5/centered_abs_mean": 0.1464044988155365, "signal/frontier_coverage_5/group_std_mean": 0.19371420741081238, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620640443637967, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620640443637967, "signal/frontier_ece_reward/centered_abs_mean": 0.004189421329647303, "signal/frontier_ece_reward/group_std_mean": 0.005466786120086909, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005236776662059129, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005236776662059129, "step": 290 }, { "calibration/aurc": 0.2359112787034589, "calibration/batch_distribution_entropy": 0.9577303840353146, "calibration/buffer_distribution_entropy": 0.955857059591185, "calibration/confidence_entropy": 0.4517232801906215, "calibration/coverage@0%": 0.067578125, "calibration/coverage@1%": 0.08125, "calibration/coverage@10%": 0.25234375, "calibration/coverage@15%": 0.3578125, "calibration/coverage@20%": 0.443359375, "calibration/coverage@25%": 0.533203125, "calibration/coverage@30%": 0.626953125, "calibration/coverage@5%": 0.165625, "calibration/ece": 0.11354453125, "calibration/mean_confidence": 0.47161171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.6, "completions/max_terminated_length": 481.6, "completions/mean_length": 233.83076171875, "completions/mean_terminated_length": 233.83076171875, "completions/min_length": 116.8, "completions/min_terminated_length": 116.8, "epoch": 0.944, "grad_norm": 0.0009692126768641174, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 1012858894.0, "reward": 0.999471652507782, "reward_std": 0.0703015498816967, "rewards/accuracy_reward": 0.53798828125, "rewards/brier_reward": 0.7908595085144043, "rewards/confidence_uniqueness_reward": 0.9465713500976562, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002295452752150595, "rewards/frontier_coverage_1": 0.1418714702129364, "rewards/frontier_coverage_10": 0.1418314516544342, "rewards/frontier_coverage_15": 0.12424526810646057, "rewards/frontier_coverage_20": 0.0976836234331131, "rewards/frontier_coverage_25": 0.06969998776912689, "rewards/frontier_coverage_5": 0.1418714702129364, "rewards/frontier_ece_reward": 0.00401430269703269, "signal/accuracy_reward/centered_abs_mean": 0.104327392578125, "signal/accuracy_reward/group_std_mean": 0.13720910102128983, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521636962890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0521636962890625, "signal/advantage_abs_mean": 0.053348977118730545, "signal/advantage_pre_scale_abs_mean": 0.053348977118730545, "signal/advantage_pre_scale_std": 0.10006897747516633, "signal/advantage_std": 0.10006897747516633, "signal/brier_reward/centered_abs_mean": 0.10836757719516754, "signal/brier_reward/group_std_mean": 0.1396089732646942, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013545947149395943, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013545947149395943, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023391056060791015, "signal/confidence_uniqueness_reward/group_std_mean": 0.02954912818968296, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002923882007598877, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002923882007598877, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015608920832164586, "signal/frontier_aurc_reward/group_std_mean": 0.0024038115050643684, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.79399668215774e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.79399668215774e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15688484013080597, "signal/frontier_coverage_1/group_std_mean": 0.20480249226093292, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028082385659217836, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028082385659217836, "signal/frontier_coverage_10/centered_abs_mean": 0.15675508975982666, "signal/frontier_coverage_10/group_std_mean": 0.2046307384967804, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028059160336852073, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028059160336852073, "signal/frontier_coverage_15/centered_abs_mean": 0.1311119645833969, "signal/frontier_coverage_15/group_std_mean": 0.1709737718105316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023469041101634503, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023469041101634503, "signal/frontier_coverage_20/centered_abs_mean": 0.0950249582529068, "signal/frontier_coverage_20/group_std_mean": 0.12448285669088363, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017009467585012317, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017009467585012317, "signal/frontier_coverage_25/centered_abs_mean": 0.0624612458050251, "signal/frontier_coverage_25/group_std_mean": 0.0814983144402504, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011180563131347298, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011180563131347298, "signal/frontier_coverage_5/centered_abs_mean": 0.15688484013080597, "signal/frontier_coverage_5/group_std_mean": 0.20480249226093292, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028082385659217836, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028082385659217836, "signal/frontier_ece_reward/centered_abs_mean": 0.00407049129717052, "signal/frontier_ece_reward/group_std_mean": 0.005348560772836209, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000508811412146315, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000508811412146315, "step": 295 }, { "calibration/aurc": 0.3216802000684701, "calibration/batch_distribution_entropy": 0.9327274087606267, "calibration/buffer_distribution_entropy": 0.9563913914075005, "calibration/confidence_entropy": 0.4118774304341909, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.064453125, "calibration/coverage@15%": 0.231640625, "calibration/coverage@20%": 0.306640625, "calibration/coverage@25%": 0.4015625, "calibration/coverage@30%": 0.4984375, "calibration/coverage@5%": 0.03671875, "calibration/ece": 0.14537527941429626, "calibration/mean_confidence": 0.5074075330857037, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 513.8, "completions/max_terminated_length": 513.8, "completions/mean_length": 228.8435546875, "completions/mean_terminated_length": 228.8435546875, "completions/min_length": 114.4, "completions/min_terminated_length": 114.4, "epoch": 0.96, "grad_norm": 0.0006835410604253411, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1030142572.0, "reward": 0.9945238113403321, "reward_std": 0.05708309561014176, "rewards/accuracy_reward": 0.51865234375, "rewards/brier_reward": 0.8066446900367736, "rewards/confidence_uniqueness_reward": 0.94853515625, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0027884629555046557, "rewards/frontier_coverage_1": 0.1710768908262253, "rewards/frontier_coverage_10": 0.1710768908262253, "rewards/frontier_coverage_15": 0.14798834621906282, "rewards/frontier_coverage_20": 0.1122938945889473, "rewards/frontier_coverage_25": 0.0784688264131546, "rewards/frontier_coverage_5": 0.1710768908262253, "rewards/frontier_ece_reward": 0.004796722158789635, "signal/accuracy_reward/centered_abs_mean": 0.074957275390625, "signal/accuracy_reward/group_std_mean": 0.10602360963821411, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0374786376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0374786376953125, "signal/advantage_abs_mean": 0.04150651097297668, "signal/advantage_pre_scale_abs_mean": 0.04150651097297668, "signal/advantage_pre_scale_std": 0.08337944746017456, "signal/advantage_std": 0.08337944746017456, "signal/brier_reward/centered_abs_mean": 0.10154019445180892, "signal/brier_reward/group_std_mean": 0.1345837637782097, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012692524306476115, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012692524306476115, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022342228889465333, "signal/confidence_uniqueness_reward/group_std_mean": 0.028145313262939453, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027927786111831667, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027927786111831667, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021448221756145357, "signal/frontier_aurc_reward/group_std_mean": 0.0035167032852768897, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.839231430902146e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.839231430902146e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1430927574634552, "signal/frontier_coverage_1/group_std_mean": 0.19070055186748505, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025613602716475724, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025613602716475724, "signal/frontier_coverage_10/centered_abs_mean": 0.1430927574634552, "signal/frontier_coverage_10/group_std_mean": 0.19070055186748505, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025613602716475724, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025613602716475724, "signal/frontier_coverage_15/centered_abs_mean": 0.12214766442775726, "signal/frontier_coverage_15/group_std_mean": 0.16296298503875734, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021864432375878094, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021864432375878094, "signal/frontier_coverage_20/centered_abs_mean": 0.08906677216291428, "signal/frontier_coverage_20/group_std_mean": 0.11882689893245697, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015942952129989862, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015942952129989862, "signal/frontier_coverage_25/centered_abs_mean": 0.06060428842902184, "signal/frontier_coverage_25/group_std_mean": 0.07976671904325486, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010848167585209012, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010848167585209012, "signal/frontier_coverage_5/centered_abs_mean": 0.1430927574634552, "signal/frontier_coverage_5/group_std_mean": 0.19070055186748505, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025613602716475724, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025613602716475724, "signal/frontier_ece_reward/centered_abs_mean": 0.004108147229999304, "signal/frontier_ece_reward/group_std_mean": 0.00542615270242095, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000513518403749913, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000513518403749913, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.502782782980951, "eval_calibration/batch_distribution_entropy": 0.9078093664399773, "eval_calibration/buffer_distribution_entropy": 0.9551198154788769, "eval_calibration/confidence_entropy": 0.43107506961147557, "eval_calibration/coverage@0%": 0.0703125, "eval_calibration/coverage@1%": 0.0703125, "eval_calibration/coverage@10%": 0.0703125, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.140625, "eval_calibration/coverage@30%": 0.1796875, "eval_calibration/coverage@5%": 0.0703125, "eval_calibration/ece": 0.20484375, "eval_calibration/mean_confidence": 0.45484375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 356.75, "eval_completions/max_terminated_length": 356.75, "eval_completions/mean_length": 227.28704071044922, "eval_completions/mean_terminated_length": 227.28704071044922, "eval_completions/min_length": 122.0, "eval_completions/min_terminated_length": 122.0, "eval_loss": 0.0, "eval_num_tokens": 1030142572.0, "eval_reward": 0.9478924721479416, "eval_reward_std": 0.22710193321108818, "eval_rewards/accuracy_reward": 0.4296875, "eval_rewards/brier_reward": 0.803790807723999, "eval_rewards/confidence_uniqueness_reward": 0.894287109375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0038025410613045096, "eval_rewards/frontier_coverage_1": 0.23142481595277786, "eval_rewards/frontier_coverage_10": 0.2294025495648384, "eval_rewards/frontier_coverage_15": 0.19725340977311134, "eval_rewards/frontier_coverage_20": 0.14543926157057285, "eval_rewards/frontier_coverage_25": 0.09397028014063835, "eval_rewards/frontier_coverage_5": 0.23142481595277786, "eval_rewards/frontier_ece_reward": 0.005195607780478895, "eval_runtime": 19.8919, "eval_samples_per_second": 25.136, "eval_signal/accuracy_reward/centered_abs_mean": 0.46728515625, "eval_signal/accuracy_reward/group_std_mean": 0.490493468940258, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.233642578125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.233642578125, "eval_signal/advantage_abs_mean": 0.20863738283514977, "eval_signal/advantage_pre_scale_abs_mean": 0.20863738283514977, "eval_signal/advantage_pre_scale_std": 0.22465674951672554, "eval_signal/advantage_std": 0.22465674951672554, "eval_signal/brier_reward/centered_abs_mean": 0.2109249383211136, "eval_signal/brier_reward/group_std_mean": 0.264465369284153, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0263656172901392, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0263656172901392, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0487518310546875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06017216946929693, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060939788818359375, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060939788818359375, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005303551617544144, "eval_signal/frontier_aurc_reward/group_std_mean": 0.011191037716343999, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.493357356404886e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.493357356404886e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.394241102039814, "eval_signal/frontier_coverage_1/group_std_mean": 0.47531820833683014, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007056915084831417, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007056915084831417, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.39137494564056396, "eval_signal/frontier_coverage_10/group_std_mean": 0.47202398627996445, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00700561108533293, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00700561108533293, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3299722671508789, "eval_signal/frontier_coverage_15/group_std_mean": 0.400464303791523, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005906503647565842, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005906503647565842, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.22924000769853592, "eval_signal/frontier_coverage_20/group_std_mean": 0.2813456952571869, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004103396320715547, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004103396320715547, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.12824426405131817, "eval_signal/frontier_coverage_25/group_std_mean": 0.16049236804246902, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022955723688937724, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022955723688937724, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.394241102039814, "eval_signal/frontier_coverage_5/group_std_mean": 0.47531820833683014, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007056915084831417, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007056915084831417, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.008114422438666224, "eval_signal/frontier_ece_reward/group_std_mean": 0.009916237089782953, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001014302804833278, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001014302804833278, "eval_steps_per_second": 0.201, "step": 300 }, { "calibration/aurc": 0.25771066147986865, "calibration/batch_distribution_entropy": 0.9374598895353655, "calibration/buffer_distribution_entropy": 0.9549208391660825, "calibration/confidence_entropy": 0.44294141392762426, "calibration/coverage@0%": 0.03359375, "calibration/coverage@1%": 0.03359375, "calibration/coverage@10%": 0.28828125, "calibration/coverage@15%": 0.4203125, "calibration/coverage@20%": 0.476171875, "calibration/coverage@25%": 0.54296875, "calibration/coverage@30%": 0.59921875, "calibration/coverage@5%": 0.06640625, "calibration/ece": 0.14654739762244245, "calibration/mean_confidence": 0.5065541648775576, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 718.4, "completions/max_terminated_length": 519.8, "completions/mean_length": 226.2775390625, "completions/mean_terminated_length": 226.14942321777343, "completions/min_length": 109.4, "completions/min_terminated_length": 109.4, "epoch": 0.976, "grad_norm": 0.0014678977895528078, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 1047320774.0, "reward": 1.007277262210846, "reward_std": 0.06227183118462563, "rewards/accuracy_reward": 0.54677734375, "rewards/brier_reward": 0.8078455209732056, "rewards/confidence_uniqueness_reward": 0.9517561435699463, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002812092285603285, "rewards/frontier_coverage_1": 0.1481065958738327, "rewards/frontier_coverage_10": 0.14778946116566657, "rewards/frontier_coverage_15": 0.13327017948031425, "rewards/frontier_coverage_20": 0.10284108966588974, "rewards/frontier_coverage_25": 0.07239802479743958, "rewards/frontier_coverage_5": 0.1481065958738327, "rewards/frontier_ece_reward": 0.004540855251252651, "signal/accuracy_reward/centered_abs_mean": 0.085162353515625, "signal/accuracy_reward/group_std_mean": 0.1166534885764122, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0425811767578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0425811767578125, "signal/advantage_abs_mean": 0.045932318270206454, "signal/advantage_pre_scale_abs_mean": 0.045932318270206454, "signal/advantage_pre_scale_std": 0.08938146680593491, "signal/advantage_std": 0.08938146680593491, "signal/brier_reward/centered_abs_mean": 0.1009139209985733, "signal/brier_reward/group_std_mean": 0.13128523528575897, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012614240124821662, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012614240124821662, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020480955392122267, "signal/confidence_uniqueness_reward/group_std_mean": 0.02599894180893898, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025601194240152834, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025601194240152834, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002267425088211894, "signal/frontier_aurc_reward/group_std_mean": 0.003777716076001525, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.058690792589914e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.058690792589914e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1478489577770233, "signal/frontier_coverage_1/group_std_mean": 0.19217921793460846, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026464962400496008, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026464962400496008, "signal/frontier_coverage_10/centered_abs_mean": 0.1464183211326599, "signal/frontier_coverage_10/group_std_mean": 0.19032938480377198, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620887756347656, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620887756347656, "signal/frontier_coverage_15/centered_abs_mean": 0.1232375368475914, "signal/frontier_coverage_15/group_std_mean": 0.16054988354444505, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002205951721407473, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002205951721407473, "signal/frontier_coverage_20/centered_abs_mean": 0.08783506155014038, "signal/frontier_coverage_20/group_std_mean": 0.11469702571630477, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015722476178780197, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015722476178780197, "signal/frontier_coverage_25/centered_abs_mean": 0.05822276622056961, "signal/frontier_coverage_25/group_std_mean": 0.07532109916210175, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010421874932944775, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010421874932944775, "signal/frontier_coverage_5/centered_abs_mean": 0.1478489577770233, "signal/frontier_coverage_5/group_std_mean": 0.19217921793460846, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026464962400496008, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026464962400496008, "signal/frontier_ece_reward/centered_abs_mean": 0.0042947923298925165, "signal/frontier_ece_reward/group_std_mean": 0.005507392250001431, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005368490412365646, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005368490412365646, "step": 305 }, { "calibration/aurc": 0.36195064149212575, "calibration/batch_distribution_entropy": 0.9291159530510076, "calibration/buffer_distribution_entropy": 0.9555400002138736, "calibration/confidence_entropy": 0.4221106847869228, "calibration/coverage@0%": 0.007814031862745098, "calibration/coverage@1%": 0.007814031862745098, "calibration/coverage@10%": 0.058985906862745095, "calibration/coverage@15%": 0.0882827818627451, "calibration/coverage@20%": 0.1601577818627451, "calibration/coverage@25%": 0.32345894607843134, "calibration/coverage@30%": 0.4137530637254902, "calibration/coverage@5%": 0.0328140318627451, "calibration/ece": 0.15175038551879086, "calibration/mean_confidence": 0.4563323503880719, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 833.2, "completions/max_terminated_length": 634.0, "completions/mean_length": 221.01396484375, "completions/mean_terminated_length": 220.75893859863282, "completions/min_length": 109.8, "completions/min_terminated_length": 109.8, "epoch": 0.992, "grad_norm": 0.0008075599907897413, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 1064712437.0, "reward": 0.9926512241363525, "reward_std": 0.05921575650572777, "rewards/accuracy_reward": 0.51943359375, "rewards/brier_reward": 0.8002906322479248, "rewards/confidence_uniqueness_reward": 0.9479501247406006, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027773221023380756, "rewards/frontier_coverage_1": 0.15749771595001222, "rewards/frontier_coverage_10": 0.15701024532318114, "rewards/frontier_coverage_15": 0.13756523728370668, "rewards/frontier_coverage_20": 0.10082580447196961, "rewards/frontier_coverage_25": 0.07283035963773728, "rewards/frontier_coverage_5": 0.15749771595001222, "rewards/frontier_ece_reward": 0.004255708307027817, "signal/accuracy_reward/centered_abs_mean": 0.079058837890625, "signal/accuracy_reward/group_std_mean": 0.10680015832185745, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0395294189453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0395294189453125, "signal/advantage_abs_mean": 0.043850655853748324, "signal/advantage_pre_scale_abs_mean": 0.043850655853748324, "signal/advantage_pre_scale_std": 0.08708179742097855, "signal/advantage_std": 0.08708179742097855, "signal/brier_reward/centered_abs_mean": 0.1025318220257759, "signal/brier_reward/group_std_mean": 0.13148369193077086, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012816477753221988, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012816477753221988, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02215721495449543, "signal/confidence_uniqueness_reward/group_std_mean": 0.02793830633163452, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027696518693119286, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027696518693119286, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021893641911447047, "signal/frontier_aurc_reward/group_std_mean": 0.0037097081542015074, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9189616654766726e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9189616654766726e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14424746930599214, "signal/frontier_coverage_1/group_std_mean": 0.184622061252594, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002582029718905687, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002582029718905687, "signal/frontier_coverage_10/centered_abs_mean": 0.14393795430660247, "signal/frontier_coverage_10/group_std_mean": 0.18423262238502502, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025764893274754287, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025764893274754287, "signal/frontier_coverage_15/centered_abs_mean": 0.12318403720855713, "signal/frontier_coverage_15/group_std_mean": 0.15762063413858413, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002204994112253189, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002204994112253189, "signal/frontier_coverage_20/centered_abs_mean": 0.08359026908874512, "signal/frontier_coverage_20/group_std_mean": 0.10689203143119812, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014962658053264022, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014962658053264022, "signal/frontier_coverage_25/centered_abs_mean": 0.057026924937963484, "signal/frontier_coverage_25/group_std_mean": 0.07260482162237167, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010207819053903223, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010207819053903223, "signal/frontier_coverage_5/centered_abs_mean": 0.14424746930599214, "signal/frontier_coverage_5/group_std_mean": 0.184622061252594, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002582029718905687, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002582029718905687, "signal/frontier_ece_reward/centered_abs_mean": 0.004057144792750478, "signal/frontier_ece_reward/group_std_mean": 0.005179398506879806, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005071430990938097, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005071430990938097, "step": 310 }, { "calibration/aurc": 0.2747143001846701, "calibration/batch_distribution_entropy": 0.8806812013835696, "calibration/buffer_distribution_entropy": 0.9564232842329621, "calibration/confidence_entropy": 0.4171977146312076, "calibration/coverage@0%": 0.017578125, "calibration/coverage@1%": 0.017578125, "calibration/coverage@10%": 0.095703125, "calibration/coverage@15%": 0.1357421875, "calibration/coverage@20%": 0.158203125, "calibration/coverage@25%": 0.5341796875, "calibration/coverage@30%": 0.6318359375, "calibration/coverage@5%": 0.0537109375, "calibration/ece": 0.17599609375000003, "calibration/mean_confidence": 0.6320703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000244140625, "completions/max_length": 995.0, "completions/max_terminated_length": 803.5, "completions/mean_length": 220.93310546875, "completions/mean_terminated_length": 220.61163330078125, "completions/min_length": 98.5, "completions/min_terminated_length": 98.5, "epoch": 0.9984, "num_tokens": 1071619903.0, "reward": 0.9982321858406067, "reward_std": 0.06220795214176178, "rewards/accuracy_reward": 0.54638671875, "rewards/brier_reward": 0.7730526924133301, "rewards/confidence_uniqueness_reward": 0.954784631729126, "rewards/format_reward": 0.999755859375, "rewards/frontier_aurc_reward": -0.0029595731757581234, "rewards/frontier_coverage_1": 0.10051992163062096, "rewards/frontier_coverage_10": 0.10056523606181145, "rewards/frontier_coverage_15": 0.07943737879395485, "rewards/frontier_coverage_20": 0.06294701993465424, "rewards/frontier_coverage_25": 0.04942700266838074, "rewards/frontier_coverage_5": 0.10051992163062096, "rewards/frontier_ece_reward": 0.003216548007912934, "signal/accuracy_reward/centered_abs_mean": 0.073822021484375, "signal/accuracy_reward/group_std_mean": 0.10335757955908775, "signal/accuracy_reward/group_zero_std_frac": 0.6796875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0369110107421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0369110107421875, "signal/advantage_abs_mean": 0.046172238886356354, "signal/advantage_pre_scale_abs_mean": 0.046172238886356354, "signal/advantage_pre_scale_std": 0.09168939664959908, "signal/advantage_std": 0.09168939664959908, "signal/brier_reward/centered_abs_mean": 0.10888796299695969, "signal/brier_reward/group_std_mean": 0.1396150141954422, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01361099537461996, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01361099537461996, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019547984935343266, "signal/confidence_uniqueness_reward/group_std_mean": 0.024895640090107918, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002443498116917908, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002443498116917908, "signal/format_reward/centered_abs_mean": 0.0004730224609375, "signal/format_reward/group_std_mean": 0.0013810679083690047, "signal/format_reward/group_zero_std_frac": 0.9921875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022028597304597497, "signal/frontier_aurc_reward/group_std_mean": 0.00342005817219615, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.943118827010039e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.943118827010039e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.134110227227211, "signal/frontier_coverage_1/group_std_mean": 0.17590243369340897, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024005728773772717, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024005728773772717, "signal/frontier_coverage_10/centered_abs_mean": 0.13338283449411392, "signal/frontier_coverage_10/group_std_mean": 0.1749531328678131, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023875526385381818, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023875526385381818, "signal/frontier_coverage_15/centered_abs_mean": 0.114329993724823, "signal/frontier_coverage_15/group_std_mean": 0.14955615997314453, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020465069683268666, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020465069683268666, "signal/frontier_coverage_20/centered_abs_mean": 0.07507089525461197, "signal/frontier_coverage_20/group_std_mean": 0.09895683825016022, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001343769021332264, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001343769021332264, "signal/frontier_coverage_25/centered_abs_mean": 0.051243193447589874, "signal/frontier_coverage_25/group_std_mean": 0.06748097017407417, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009172531717922539, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009172531717922539, "signal/frontier_coverage_5/centered_abs_mean": 0.134110227227211, "signal/frontier_coverage_5/group_std_mean": 0.17590243369340897, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024005728773772717, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024005728773772717, "signal/frontier_ece_reward/centered_abs_mean": 0.00416590110398829, "signal/frontier_ece_reward/group_std_mean": 0.005485004745423794, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005207376379985362, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005207376379985362, "step": 312, "total_flos": 0.0, "train_loss": 0.004686371226200255, "train_runtime": 60660.1417, "train_samples_per_second": 0.33, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1071619903, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }