{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5119607631252925, "calibration/batch_distribution_entropy": 0.2767451001971738, "calibration/confidence_entropy": 0.22007440379474952, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47048212152499086, "calibration/mean_confidence": 0.9157685071277196, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018576388888888885, "completions/max_length": 4042.4, "completions/max_terminated_length": 4042.4, "completions/mean_length": 517.6447082519531, "completions/mean_terminated_length": 527.4365112304688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.005689694546163082, "learning_rate": 5.952380952380953e-07, "loss": 0.0074, "num_tokens": 9077475.0, "reward": 0.6640230894088746, "reward_std": 0.6719910860061645, "rewards/accuracy_reward": 0.26406249701976775, "rewards/brier_reward": 0.31690160036087034, "rewards/confidence_uniqueness_reward": 0.2950827181339264, "rewards/format_reward": 0.6027777671813965, "rewards/frontier_aurc_reward": 0.2791689395904541, "rewards/frontier_ece_reward": 0.2791689395904541, "rewards/frontier_entropy_batch_reward": -0.5742027401924134, "rewards/volume_coverage_0": 0.2791689395904541, "rewards/volume_coverage_1": 0.2791689395904541, "rewards/volume_coverage_10": 0.2791689395904541, "rewards/volume_coverage_15": 0.2791689395904541, "rewards/volume_coverage_20": 0.2791689395904541, "rewards/volume_coverage_25": 0.2791689395904541, "rewards/volume_coverage_5": 0.2791689395904541, "signal/accuracy_reward/centered_abs_mean": 0.31510416865348817, "signal/accuracy_reward/group_std_mean": 0.374676376581192, "signal/accuracy_reward/group_zero_std_frac": 0.08055555745959282, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15755208432674409, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15755208432674409, "signal/advantage_abs_mean": 0.5728595376014709, "signal/advantage_pre_scale_abs_mean": 0.5728595376014709, "signal/advantage_pre_scale_std": 0.6879928708076477, "signal/advantage_std": 0.6879928708076477, "signal/brier_reward/centered_abs_mean": 0.3253703832626343, "signal/brier_reward/group_std_mean": 0.3777146339416504, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03253703787922859, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03253703787922859, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23825904428958894, "signal/confidence_uniqueness_reward/group_std_mean": 0.2890412747859955, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023825905472040176, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023825905472040176, "signal/format_reward/centered_abs_mean": 0.44292533993721006, "signal/format_reward/group_std_mean": 0.47658112049102785, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.22146266996860503, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.22146266996860503, "signal/frontier_aurc_reward/centered_abs_mean": 0.31583258509635925, "signal/frontier_aurc_reward/group_std_mean": 0.3733566999435425, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003947907360270619, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003947907360270619, "signal/frontier_ece_reward/centered_abs_mean": 0.31583258509635925, "signal/frontier_ece_reward/group_std_mean": 0.3733566999435425, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031583258882164955, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45222132802009585, "signal/frontier_entropy_batch_reward/group_std_mean": 0.483779114484787, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04522213339805603, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04522213339805603, "signal/volume_coverage_0/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_0/group_std_mean": 0.3733566999435425, "signal/volume_coverage_0/group_zero_std_frac": 0.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_1/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_1/group_std_mean": 0.3733566999435425, "signal/volume_coverage_1/group_zero_std_frac": 0.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_10/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_10/group_std_mean": 0.3733566999435425, "signal/volume_coverage_10/group_zero_std_frac": 0.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_15/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_15/group_std_mean": 0.3733566999435425, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_20/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_20/group_std_mean": 0.3733566999435425, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_25/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_25/group_std_mean": 0.3733566999435425, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_5/centered_abs_mean": 0.31583258509635925, "signal/volume_coverage_5/group_std_mean": 0.3733566999435425, "signal/volume_coverage_5/group_zero_std_frac": 0.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.031583258882164955, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.031583258882164955, "step": 5 }, { "calibration/aurc": 0.5121039943055846, "calibration/batch_distribution_entropy": 0.2556783321589634, "calibration/confidence_entropy": 0.22334692393582234, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.46985558555759993, "calibration/mean_confidence": 0.9213281837751548, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016840277777777767, "completions/max_length": 3861.8, "completions/max_terminated_length": 3861.8, "completions/mean_length": 478.6218017578125, "completions/mean_terminated_length": 487.04514770507814, "completions/min_length": 0.0, "completions/min_terminated_length": 14.2, "epoch": 0.023999700003749954, "grad_norm": 0.005293714813888073, "learning_rate": 1.1904761904761906e-06, "loss": 0.0012, "num_tokens": 17673918.0, "reward": 0.7621858239173889, "reward_std": 0.6452823042869568, "rewards/accuracy_reward": 0.29513888955116274, "rewards/brier_reward": 0.35790597200393676, "rewards/confidence_uniqueness_reward": 0.3535000741481781, "rewards/format_reward": 0.7201388716697693, "rewards/frontier_aurc_reward": 0.3103605091571808, "rewards/frontier_ece_reward": 0.3103605091571808, "rewards/frontier_entropy_batch_reward": -0.6876158952713013, "rewards/volume_coverage_0": 0.3103605091571808, "rewards/volume_coverage_1": 0.3103605091571808, "rewards/volume_coverage_10": 0.3103605091571808, "rewards/volume_coverage_15": 0.3103605091571808, "rewards/volume_coverage_20": 0.3103605091571808, "rewards/volume_coverage_25": 0.3103605091571808, "rewards/volume_coverage_5": 0.3103605091571808, "signal/accuracy_reward/centered_abs_mean": 0.324945741891861, "signal/accuracy_reward/group_std_mean": 0.38479640483856203, "signal/accuracy_reward/group_zero_std_frac": 0.06666666939854622, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1624728709459305, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1624728709459305, "signal/advantage_abs_mean": 0.5411736607551575, "signal/advantage_pre_scale_abs_mean": 0.5411736607551575, "signal/advantage_pre_scale_std": 0.6594692945480347, "signal/advantage_std": 0.6594692945480347, "signal/brier_reward/centered_abs_mean": 0.3189652979373932, "signal/brier_reward/group_std_mean": 0.37320741415023806, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031896531209349634, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031896531209349634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2224169671535492, "signal/confidence_uniqueness_reward/group_std_mean": 0.2778456211090088, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02224169746041298, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02224169746041298, "signal/format_reward/centered_abs_mean": 0.35045573115348816, "signal/format_reward/group_std_mean": 0.41651219725608823, "signal/format_reward/group_zero_std_frac": 0.008333333395421505, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17522786557674408, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17522786557674408, "signal/frontier_aurc_reward/centered_abs_mean": 0.3180912435054779, "signal/frontier_aurc_reward/group_std_mean": 0.37567706108093263, "signal/frontier_aurc_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00397614068351686, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00397614068351686, "signal/frontier_ece_reward/centered_abs_mean": 0.3180912435054779, "signal/frontier_ece_reward/group_std_mean": 0.37567706108093263, "signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03180912546813488, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3786880075931549, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43963631987571716, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03786880299448967, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03786880299448967, "signal/volume_coverage_0/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_0/group_std_mean": 0.37567706108093263, "signal/volume_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_1/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_1/group_std_mean": 0.37567706108093263, "signal/volume_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_10/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_10/group_std_mean": 0.37567706108093263, "signal/volume_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_15/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_15/group_std_mean": 0.37567706108093263, "signal/volume_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_20/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_20/group_std_mean": 0.37567706108093263, "signal/volume_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_25/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_25/group_std_mean": 0.37567706108093263, "signal/volume_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_5/centered_abs_mean": 0.3180912435054779, "signal/volume_coverage_5/group_std_mean": 0.37567706108093263, "signal/volume_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.03180912546813488, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.03180912546813488, "step": 10 }, { "calibration/aurc": 0.5135904311928525, "calibration/batch_distribution_entropy": 0.281091964786097, "calibration/confidence_entropy": 0.22602702961388318, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48452310285305156, "calibration/mean_confidence": 0.9164284363135969, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333326, "completions/max_length": 3757.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 459.0904541015625, "completions/mean_terminated_length": 463.7198120117188, "completions/min_length": 0.0, "completions/min_terminated_length": 64.8, "epoch": 0.03599955000562493, "grad_norm": 0.00195339135825634, "learning_rate": 1.7857142857142859e-06, "loss": -0.0134, "num_tokens": 26064624.0, "reward": 0.9528237581253052, "reward_std": 0.5587735056877137, "rewards/accuracy_reward": 0.3482638895511627, "rewards/brier_reward": 0.44555225372314455, "rewards/confidence_uniqueness_reward": 0.49732959270477295, "rewards/format_reward": 0.9381944417953492, "rewards/frontier_aurc_reward": 0.3752441704273224, "rewards/frontier_ece_reward": 0.3752441704273224, "rewards/frontier_entropy_batch_reward": -0.8957948088645935, "rewards/volume_coverage_0": 0.3752441704273224, "rewards/volume_coverage_1": 0.3752441704273224, "rewards/volume_coverage_10": 0.3752441704273224, "rewards/volume_coverage_15": 0.3752441704273224, "rewards/volume_coverage_20": 0.3752441704273224, "rewards/volume_coverage_25": 0.3752441704273224, "rewards/volume_coverage_5": 0.3752441704273224, "signal/accuracy_reward/centered_abs_mean": 0.32180989980697633, "signal/accuracy_reward/group_std_mean": 0.38036916255950926, "signal/accuracy_reward/group_zero_std_frac": 0.07500000223517418, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16090494990348816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16090494990348816, "signal/advantage_abs_mean": 0.46738156080245974, "signal/advantage_pre_scale_abs_mean": 0.46738156080245974, "signal/advantage_pre_scale_std": 0.5751462697982788, "signal/advantage_std": 0.5751462697982788, "signal/brier_reward/centered_abs_mean": 0.30256916880607604, "signal/brier_reward/group_std_mean": 0.3543079555034637, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030256916582584382, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030256916582584382, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1871120035648346, "signal/confidence_uniqueness_reward/group_std_mean": 0.2356875717639923, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01871120072901249, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01871120072901249, "signal/format_reward/centered_abs_mean": 0.10648871511220932, "signal/format_reward/group_std_mean": 0.18728102892637252, "signal/format_reward/group_zero_std_frac": 0.29444444477558135, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05324435755610466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05324435755610466, "signal/frontier_aurc_reward/centered_abs_mean": 0.314031195640564, "signal/frontier_aurc_reward/group_std_mean": 0.3691504061222076, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003925389749929309, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003925389749929309, "signal/frontier_ece_reward/centered_abs_mean": 0.314031195640564, "signal/frontier_ece_reward/group_std_mean": 0.3691504061222076, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031403117999434474, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17255694419145584, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2781273782253265, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0888888917863369, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017255694791674613, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017255694791674613, "signal/volume_coverage_0/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_0/group_std_mean": 0.3691504061222076, "signal/volume_coverage_0/group_zero_std_frac": 0.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_1/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_1/group_std_mean": 0.3691504061222076, "signal/volume_coverage_1/group_zero_std_frac": 0.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_10/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_10/group_std_mean": 0.3691504061222076, "signal/volume_coverage_10/group_zero_std_frac": 0.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_15/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_15/group_std_mean": 0.3691504061222076, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_20/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_20/group_std_mean": 0.3691504061222076, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_25/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_25/group_std_mean": 0.3691504061222076, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_5/centered_abs_mean": 0.314031195640564, "signal/volume_coverage_5/group_std_mean": 0.3691504061222076, "signal/volume_coverage_5/group_zero_std_frac": 0.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.031403117999434474, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.031403117999434474, "step": 15 }, { "calibration/aurc": 0.4400840846735198, "calibration/batch_distribution_entropy": 0.38120191367823736, "calibration/buffer_distribution_entropy": 0.29442792961319336, "calibration/confidence_entropy": 0.29531402386614836, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.2295914079238342, "calibration/coverage@5%": 0.0, "calibration/ece": 0.36975757858520253, "calibration/mean_confidence": 0.8869711695530038, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00894097222222221, "completions/max_length": 4041.4, "completions/max_terminated_length": 4041.4, "completions/mean_length": 495.19219360351565, "completions/mean_terminated_length": 499.751171875, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.04799940000749991, "grad_norm": 0.0007564106490463018, "learning_rate": 2.380952380952381e-06, "loss": -0.0063, "num_tokens": 34882934.0, "reward": 0.8950790762901306, "reward_std": 0.34014883935451506, "rewards/accuracy_reward": 0.4636284828186035, "rewards/brier_reward": 0.5718536376953125, "rewards/confidence_uniqueness_reward": 0.5620216369628906, "rewards/format_reward": 0.9834201335906982, "rewards/frontier_aurc_reward": 0.18470853520557284, "rewards/frontier_ece_reward": 0.18528626561164857, "rewards/frontier_entropy_batch_reward": -0.9422903299331665, "rewards/volume_coverage_0": 0.18794108111264332, "rewards/volume_coverage_1": 0.18794108111264332, "rewards/volume_coverage_10": 0.18794108111264332, "rewards/volume_coverage_15": 0.18794108169834006, "rewards/volume_coverage_20": 0.1879410865440863, "rewards/volume_coverage_25": 0.18794108712614993, "rewards/volume_coverage_5": 0.18794108111264332, "signal/accuracy_reward/centered_abs_mean": 0.2878743469715118, "signal/accuracy_reward/group_std_mean": 0.35547043681144713, "signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1439371734857559, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1439371734857559, "signal/advantage_abs_mean": 0.272026863694191, "signal/advantage_pre_scale_abs_mean": 0.272026863694191, "signal/advantage_pre_scale_std": 0.3537022441625595, "signal/advantage_std": 0.3537022441625595, "signal/brier_reward/centered_abs_mean": 0.2541215270757675, "signal/brier_reward/group_std_mean": 0.31329566836357114, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025412153080105783, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.025412153080105783, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19161063432693481, "signal/confidence_uniqueness_reward/group_std_mean": 0.2286382406949997, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019161063805222513, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019161063805222513, "signal/format_reward/centered_abs_mean": 0.030018445663154127, "signal/format_reward/group_std_mean": 0.06528293080627919, "signal/format_reward/group_zero_std_frac": 0.7027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015009222831577063, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015009222831577063, "signal/frontier_aurc_reward/centered_abs_mean": 0.11436173026449978, "signal/frontier_aurc_reward/group_std_mean": 0.14147313190624117, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0014295217762992252, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0014295217762992252, "signal/frontier_ece_reward/centered_abs_mean": 0.19664273262023926, "signal/frontier_ece_reward/group_std_mean": 0.23796773850917816, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.019664275087416173, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.019664275087416173, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10116236060857772, "signal/frontier_entropy_batch_reward/group_std_mean": 0.18831903338432313, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.30555555522441863, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01011623591184616, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01011623591184616, "signal/volume_coverage_0/centered_abs_mean": 0.11197410996287056, "signal/volume_coverage_0/group_std_mean": 0.1381310252406534, "signal/volume_coverage_0/group_zero_std_frac": 0.5555555582046509, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_1/centered_abs_mean": 0.11197410996287056, "signal/volume_coverage_1/group_std_mean": 0.1381310252406534, "signal/volume_coverage_1/group_zero_std_frac": 0.5555555582046509, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_10/centered_abs_mean": 0.11197410996287056, "signal/volume_coverage_10/group_std_mean": 0.1381310252406534, "signal/volume_coverage_10/group_zero_std_frac": 0.5555555582046509, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_15/centered_abs_mean": 0.11197411371267538, "signal/volume_coverage_15/group_std_mean": 0.13813103191433065, "signal/volume_coverage_15/group_zero_std_frac": 0.4888888955116272, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.011197412563360465, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.011197412563360465, "signal/volume_coverage_20/centered_abs_mean": 0.11197412589439945, "signal/volume_coverage_20/group_std_mean": 0.13813105202186832, "signal/volume_coverage_20/group_zero_std_frac": 0.4888888955116272, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.011197413781532606, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.011197413781532606, "signal/volume_coverage_25/centered_abs_mean": 0.11197412752295328, "signal/volume_coverage_25/group_std_mean": 0.13813105459340544, "signal/volume_coverage_25/group_zero_std_frac": 0.4888888955116272, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.011197413944388224, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.011197413944388224, "signal/volume_coverage_5/centered_abs_mean": 0.11197410996287056, "signal/volume_coverage_5/group_std_mean": 0.1381310252406534, "signal/volume_coverage_5/group_zero_std_frac": 0.5555555582046509, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.011197412188379958, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 0.011197412188379958, "step": 20 }, { "calibration/aurc": 0.3659491213453068, "calibration/batch_distribution_entropy": 0.4747177255582935, "calibration/buffer_distribution_entropy": 0.33521721039781777, "calibration/confidence_entropy": 0.32741087611064146, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.019270833333333334, "calibration/coverage@20%": 0.019270833333333334, "calibration/coverage@25%": 0.21563344594594597, "calibration/coverage@30%": 0.3589189189189189, "calibration/coverage@5%": 0.0, "calibration/ece": 0.28585367982933974, "calibration/mean_confidence": 0.8670943425313327, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222232, "completions/max_length": 3761.6, "completions/max_terminated_length": 3761.6, "completions/mean_length": 535.991943359375, "completions/mean_terminated_length": 542.1177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.8, "epoch": 0.05999925000937488, "grad_norm": 0.0008135500829666853, "learning_rate": 2.9761904761904763e-06, "loss": -0.0054, "num_tokens": 44182009.0, "reward": 0.797996187210083, "reward_std": 0.22051306068897247, "rewards/accuracy_reward": 0.5412326335906983, "rewards/brier_reward": 0.6488373517990113, "rewards/confidence_uniqueness_reward": 0.6406527280807495, "rewards/format_reward": 0.9836805462837219, "rewards/frontier_aurc_reward": -0.004265864612534642, "rewards/frontier_ece_reward": 0.010874219285324217, "rewards/frontier_entropy_batch_reward": -0.9444352984428406, "rewards/volume_coverage_0": 8.792097189180836e-10, "rewards/volume_coverage_1": 8.792097189180836e-10, "rewards/volume_coverage_10": 2.2515131453682e-09, "rewards/volume_coverage_15": 3.5743737192284187e-09, "rewards/volume_coverage_20": 5.067362618405013e-09, "rewards/volume_coverage_25": 1.9802690931491895e-08, "rewards/volume_coverage_5": 8.792097189180836e-10, "signal/accuracy_reward/centered_abs_mean": 0.27207574248313904, "signal/accuracy_reward/group_std_mean": 0.33846710324287416, "signal/accuracy_reward/group_zero_std_frac": 0.11944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13603787124156952, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13603787124156952, "signal/advantage_abs_mean": 0.1727396160364151, "signal/advantage_pre_scale_abs_mean": 0.1727396160364151, "signal/advantage_pre_scale_std": 0.23474966883659362, "signal/advantage_std": 0.23474966883659362, "signal/brier_reward/centered_abs_mean": 0.22204743921756745, "signal/brier_reward/group_std_mean": 0.2769301772117615, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022204744815826415, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022204744815826415, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15974532812833786, "signal/confidence_uniqueness_reward/group_std_mean": 0.1887336254119873, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015974533185362814, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015974533185362814, "signal/format_reward/centered_abs_mean": 0.02893880233168602, "signal/format_reward/group_std_mean": 0.058059143275022505, "signal/format_reward/group_zero_std_frac": 0.7500000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01446940116584301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01446940116584301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031789666507393123, "signal/frontier_aurc_reward/group_std_mean": 0.004654883686453104, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.973708517150954e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.973708517150954e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.11750385165214539, "signal/frontier_ece_reward/group_std_mean": 0.14042254984378816, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011750385351479053, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011750385351479053, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09630790203809739, "signal/frontier_entropy_batch_reward/group_std_mean": 0.18353441655635833, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.34166666865348816, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009630790445953608, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009630790445953608, "signal/volume_coverage_0/centered_abs_mean": 4.802972108919334e-09, "signal/volume_coverage_0/group_std_mean": 7.952000213862221e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.7916666626930237, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.802972020101492e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.802972020101492e-10, "signal/volume_coverage_1/centered_abs_mean": 4.802972108919334e-09, "signal/volume_coverage_1/group_std_mean": 7.952000213862221e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.7916666626930237, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.802972020101492e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.802972020101492e-10, "signal/volume_coverage_10/centered_abs_mean": 7.63057674824097e-09, "signal/volume_coverage_10/group_std_mean": 1.2189424847264264e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.7777777731418609, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.630577059103416e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.630577059103416e-10, "signal/volume_coverage_15/centered_abs_mean": 2.999649408863547e-08, "signal/volume_coverage_15/group_std_mean": 5.054619922617576e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.6666666746139527, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.9996494010919862e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.9996494010919862e-09, "signal/volume_coverage_20/centered_abs_mean": 5.12322889001382e-08, "signal/volume_coverage_20/group_std_mean": 8.749845026301273e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.5916666865348816, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.123229118719763e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 5.123229118719763e-09, "signal/volume_coverage_25/centered_abs_mean": 1.4009428435102221e-07, "signal/volume_coverage_25/group_std_mean": 2.419580729551285e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.5361111283302307, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.4009429694095133e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.4009429694095133e-08, "signal/volume_coverage_5/centered_abs_mean": 4.802972108919334e-09, "signal/volume_coverage_5/group_std_mean": 7.952000213862221e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.7916666626930237, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.802972020101492e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.802972020101492e-10, "step": 25 }, { "calibration/aurc": 0.290252031438701, "calibration/batch_distribution_entropy": 0.6670356474414376, "calibration/buffer_distribution_entropy": 0.3984471740039156, "calibration/confidence_entropy": 0.4580460844401667, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.013860939021464559, "calibration/coverage@15%": 0.013860939021464559, "calibration/coverage@20%": 0.09637460022365034, "calibration/coverage@25%": 0.27948800161988807, "calibration/coverage@30%": 0.6027563221881407, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14154371937498994, "calibration/mean_confidence": 0.7846670285807927, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014756944444444442, "completions/max_length": 3983.8, "completions/max_terminated_length": 3983.8, "completions/mean_length": 596.7978393554688, "completions/mean_terminated_length": 605.7838012695313, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.07199910001124986, "grad_norm": 0.0004709550703410059, "learning_rate": 3.5714285714285718e-06, "loss": -0.0083, "num_tokens": 54167040.0, "reward": 0.8399909019470215, "reward_std": 0.1914364665746689, "rewards/accuracy_reward": 0.596788203716278, "rewards/brier_reward": 0.7140596985816956, "rewards/confidence_uniqueness_reward": 0.7248314023017883, "rewards/format_reward": 0.9833333373069764, "rewards/frontier_aurc_reward": -0.00332138747908175, "rewards/frontier_ece_reward": 0.016705350019037724, "rewards/frontier_entropy_batch_reward": -0.9558795094490051, "rewards/volume_coverage_0": -9.216823436408727e-10, "rewards/volume_coverage_1": -9.216823436408727e-10, "rewards/volume_coverage_10": -1.096071113732e-09, "rewards/volume_coverage_15": -1.5953446425021055e-09, "rewards/volume_coverage_20": -3.5614115323490126e-09, "rewards/volume_coverage_25": -3.806372817938275e-07, "rewards/volume_coverage_5": -9.216823436408727e-10, "signal/accuracy_reward/centered_abs_mean": 0.2374620258808136, "signal/accuracy_reward/group_std_mean": 0.3019864022731781, "signal/accuracy_reward/group_zero_std_frac": 0.1833333343267441, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1187310129404068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1187310129404068, "signal/advantage_abs_mean": 0.14635236859321593, "signal/advantage_pre_scale_abs_mean": 0.14635236859321593, "signal/advantage_pre_scale_std": 0.2144080013036728, "signal/advantage_std": 0.2144080013036728, "signal/brier_reward/centered_abs_mean": 0.17288005352020264, "signal/brier_reward/group_std_mean": 0.21867653727531433, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01728800553828478, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01728800553828478, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0963991716504097, "signal/confidence_uniqueness_reward/group_std_mean": 0.12703455239534378, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009639917686581612, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009639917686581612, "signal/format_reward/centered_abs_mean": 0.02838541641831398, "signal/format_reward/group_std_mean": 0.05298890024423599, "signal/format_reward/group_zero_std_frac": 0.7861111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01419270820915699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01419270820915699, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019600596046075226, "signal/frontier_aurc_reward/group_std_mean": 0.002990162093192339, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4500745348632334e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4500745348632334e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.07730323448777199, "signal/frontier_ece_reward/group_std_mean": 0.09534137547016144, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007730323821306229, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007730323821306229, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07685805186629295, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1518291175365448, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4444444477558136, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.007685805577784777, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.007685805577784777, "signal/volume_coverage_0/centered_abs_mean": 4.0110336330112515e-09, "signal/volume_coverage_0/group_std_mean": 5.887272230831186e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9277777791023254, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.0110334810494754e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.0110334810494754e-10, "signal/volume_coverage_1/centered_abs_mean": 4.0110336330112515e-09, "signal/volume_coverage_1/group_std_mean": 5.887272230831186e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9277777791023254, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.0110334810494754e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.0110334810494754e-10, "signal/volume_coverage_10/centered_abs_mean": 4.491473498680065e-09, "signal/volume_coverage_10/group_std_mean": 6.584358708261462e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9277777791023254, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.4914740572610247e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.4914740572610247e-10, "signal/volume_coverage_15/centered_abs_mean": 7.2037741438713e-09, "signal/volume_coverage_15/group_std_mean": 1.065876080730277e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.8611111044883728, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.203774146646857e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 7.203774146646857e-10, "signal/volume_coverage_20/centered_abs_mean": 1.8666235868103344e-08, "signal/volume_coverage_20/group_std_mean": 2.672936527670089e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7694444358348846, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.866623697277525e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.866623697277525e-09, "signal/volume_coverage_25/centered_abs_mean": 1.0624473182341276e-06, "signal/volume_coverage_25/group_std_mean": 1.5403513770051447e-06, "signal/volume_coverage_25/group_zero_std_frac": 0.6305555611848831, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.0624474773013315e-07, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.0624474773013315e-07, "signal/volume_coverage_5/centered_abs_mean": 4.0110336330112515e-09, "signal/volume_coverage_5/group_std_mean": 5.887272230831186e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9277777791023254, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.0110334810494754e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.0110334810494754e-10, "step": 30 }, { "calibration/aurc": 0.25534025811798067, "calibration/batch_distribution_entropy": 0.6576576613585013, "calibration/buffer_distribution_entropy": 0.49322937770285424, "calibration/confidence_entropy": 0.5542242160036144, "calibration/coverage@0%": 0.002638522427440633, "calibration/coverage@1%": 0.002638522427440633, "calibration/coverage@10%": 0.05399434253326074, "calibration/coverage@15%": 0.14363486724367822, "calibration/coverage@20%": 0.22828949830648027, "calibration/coverage@25%": 0.4233740844404405, "calibration/coverage@30%": 0.7761904761904762, "calibration/coverage@5%": 0.002638522427440633, "calibration/ece": 0.09020366927006476, "calibration/mean_confidence": 0.7040882964997379, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01692708333333335, "completions/max_length": 3935.8, "completions/max_terminated_length": 3935.8, "completions/mean_length": 619.6548706054688, "completions/mean_terminated_length": 630.3608642578125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.08399895001312484, "grad_norm": 0.00048439911915920675, "learning_rate": 4.166666666666667e-06, "loss": -0.0091, "num_tokens": 64382904.0, "reward": 0.8553146839141845, "reward_std": 0.17324375808238984, "rewards/accuracy_reward": 0.63359375, "rewards/brier_reward": 0.7494083523750306, "rewards/confidence_uniqueness_reward": 0.6619056224822998, "rewards/format_reward": 0.9808159589767456, "rewards/frontier_aurc_reward": -0.0026945109479129315, "rewards/frontier_ece_reward": 0.012076981551945209, "rewards/frontier_entropy_batch_reward": -0.9419560909271241, "rewards/volume_coverage_0": -3.125959491256936e-11, "rewards/volume_coverage_1": -3.125959491256936e-11, "rewards/volume_coverage_10": -9.113416687966946e-11, "rewards/volume_coverage_15": 2.0238500209046604e-09, "rewards/volume_coverage_20": -1.1439319269612725e-09, "rewards/volume_coverage_25": -2.410270205643883e-10, "rewards/volume_coverage_5": -3.125959491256936e-11, "signal/accuracy_reward/centered_abs_mean": 0.20619032382965088, "signal/accuracy_reward/group_std_mean": 0.268117618560791, "signal/accuracy_reward/group_zero_std_frac": 0.2555555611848831, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10309516191482544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10309516191482544, "signal/advantage_abs_mean": 0.12986526787281036, "signal/advantage_pre_scale_abs_mean": 0.12986526787281036, "signal/advantage_pre_scale_std": 0.19552622437477113, "signal/advantage_std": 0.19552622437477113, "signal/brier_reward/centered_abs_mean": 0.13110830038785934, "signal/brier_reward/group_std_mean": 0.1714950382709503, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013110830076038837, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013110830076038837, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18433848023414612, "signal/confidence_uniqueness_reward/group_std_mean": 0.2146785318851471, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01843384765088558, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01843384765088558, "signal/format_reward/centered_abs_mean": 0.02930230051279068, "signal/format_reward/group_std_mean": 0.051526063680648805, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01465115025639534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01465115025639534, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012397329090163111, "signal/frontier_aurc_reward/group_std_mean": 0.00201979277189821, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.549666176288156e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.549666176288156e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04581320658326149, "signal/frontier_ece_reward/group_std_mean": 0.06267823949456215, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004581320798024535, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004581320798024535, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0961960181593895, "signal/frontier_entropy_batch_reward/group_std_mean": 0.18250569701194763, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.361111119389534, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009619602188467979, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009619602188467979, "signal/volume_coverage_0/centered_abs_mean": 6.0636567766891855e-09, "signal/volume_coverage_0/group_std_mean": 7.854496608672434e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.830555546283722, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.063656410315588e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.063656410315588e-10, "signal/volume_coverage_1/centered_abs_mean": 6.0636567766891855e-09, "signal/volume_coverage_1/group_std_mean": 7.854496608672434e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.830555546283722, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.063656410315588e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.063656410315588e-10, "signal/volume_coverage_10/centered_abs_mean": 8.75501608987861e-09, "signal/volume_coverage_10/group_std_mean": 1.1354882900604934e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.830555546283722, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.755016323025444e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.755016323025444e-10, "signal/volume_coverage_15/centered_abs_mean": 1.7241145222612887e-08, "signal/volume_coverage_15/group_std_mean": 2.2463165993968914e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.830555546283722, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.7241145178203965e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.7241145178203965e-09, "signal/volume_coverage_20/centered_abs_mean": 3.248467468353056e-08, "signal/volume_coverage_20/group_std_mean": 4.19108733495932e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7638888835906983, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.2484676504296315e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.2484676504296315e-09, "signal/volume_coverage_25/centered_abs_mean": 7.086284341539794e-08, "signal/volume_coverage_25/group_std_mean": 9.157187932373745e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7305555582046509, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.086284181667679e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 7.086284181667679e-09, "signal/volume_coverage_5/centered_abs_mean": 6.0636567766891855e-09, "signal/volume_coverage_5/group_std_mean": 7.854496608672434e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.830555546283722, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.063656410315588e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 6.063656410315588e-10, "step": 35 }, { "calibration/aurc": 0.30536688998434725, "calibration/batch_distribution_entropy": 0.6904094763169336, "calibration/buffer_distribution_entropy": 0.5629806669593707, "calibration/confidence_entropy": 0.5380215214731939, "calibration/coverage@0%": 0.005221932114882507, "calibration/coverage@1%": 0.005221932114882507, "calibration/coverage@10%": 0.006266318537859007, "calibration/coverage@15%": 0.037655678347150126, "calibration/coverage@20%": 0.1277168750056618, "calibration/coverage@25%": 0.3079806315332236, "calibration/coverage@30%": 0.35630949733989137, "calibration/coverage@5%": 0.005221932114882507, "calibration/ece": 0.11611571468320019, "calibration/mean_confidence": 0.7227581189811529, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01640625, "completions/max_length": 3681.4, "completions/max_terminated_length": 3681.4, "completions/mean_length": 638.3761352539062, "completions/mean_terminated_length": 649.0361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 173.4, "epoch": 0.09599880001499982, "grad_norm": 0.0004877288010902703, "learning_rate": 4.761904761904762e-06, "loss": -0.0111, "num_tokens": 74856517.0, "reward": 0.8764391064643859, "reward_std": 0.17399394512176514, "rewards/accuracy_reward": 0.6493923544883728, "rewards/brier_reward": 0.7531128406524659, "rewards/confidence_uniqueness_reward": 0.723856520652771, "rewards/format_reward": 0.9821180462837219, "rewards/frontier_aurc_reward": -0.002503009606152773, "rewards/frontier_ece_reward": 0.00990740694105625, "rewards/frontier_entropy_batch_reward": -0.8797252178192139, "rewards/volume_coverage_0": -9.722430145686634e-10, "rewards/volume_coverage_1": -9.722430145686634e-10, "rewards/volume_coverage_10": -1.6528835806994823e-09, "rewards/volume_coverage_15": -2.086966972153492e-09, "rewards/volume_coverage_20": -5.745023112790903e-09, "rewards/volume_coverage_25": -1.698299545438431e-08, "rewards/volume_coverage_5": -1.0142617808128752e-09, "signal/accuracy_reward/centered_abs_mean": 0.19512261152267457, "signal/accuracy_reward/group_std_mean": 0.25884974002838135, "signal/accuracy_reward/group_zero_std_frac": 0.272222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09756130576133729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09756130576133729, "signal/advantage_abs_mean": 0.12614074647426604, "signal/advantage_pre_scale_abs_mean": 0.12614074647426604, "signal/advantage_pre_scale_std": 0.19753454029560089, "signal/advantage_std": 0.19753454029560089, "signal/brier_reward/centered_abs_mean": 0.13642587661743164, "signal/brier_reward/group_std_mean": 0.17877306342124938, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013642588630318642, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013642588630318642, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.13335272669792175, "signal/confidence_uniqueness_reward/group_std_mean": 0.16444715857505798, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013335273042321205, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013335273042321205, "signal/format_reward/centered_abs_mean": 0.030859375, "signal/format_reward/group_std_mean": 0.05699694380164146, "signal/format_reward/group_zero_std_frac": 0.7722222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0154296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0154296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00158603445161134, "signal/frontier_aurc_reward/group_std_mean": 0.002575516002252698, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9825430717901328e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9825430717901328e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04253996312618256, "signal/frontier_ece_reward/group_std_mean": 0.06416215375065804, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004253996396437287, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004253996396437287, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19839468747377395, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32432641088962555, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08333333693444729, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019839468784630297, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019839468784630297, "signal/volume_coverage_0/centered_abs_mean": 5.006984452743391e-09, "signal/volume_coverage_0/group_std_mean": 6.621276663065778e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8722222208976745, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.006984546591931e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.006984546591931e-10, "signal/volume_coverage_1/centered_abs_mean": 5.006984452743391e-09, "signal/volume_coverage_1/group_std_mean": 6.621276663065778e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8722222208976745, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.006984546591931e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.006984546591931e-10, "signal/volume_coverage_10/centered_abs_mean": 1.854433347228679e-08, "signal/volume_coverage_10/group_std_mean": 2.3904849991152056e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.8722222208976745, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.8544333788179934e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.8544333788179934e-09, "signal/volume_coverage_15/centered_abs_mean": 2.2667969682560462e-08, "signal/volume_coverage_15/group_std_mean": 2.9268863091513885e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.8527777671813965, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.2667969193541916e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.2667969193541916e-09, "signal/volume_coverage_20/centered_abs_mean": 5.436813834691634e-08, "signal/volume_coverage_20/group_std_mean": 7.13955294884272e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.6250000059604645, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.436813643733273e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 5.436813643733273e-09, "signal/volume_coverage_25/centered_abs_mean": 9.411375607726314e-08, "signal/volume_coverage_25/group_std_mean": 1.2240776126049013e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.5611111104488373, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 9.411375756496199e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 9.411375756496199e-09, "signal/volume_coverage_5/centered_abs_mean": 1.1767533551060306e-08, "signal/volume_coverage_5/group_std_mean": 1.5185777238468924e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.8722222208976745, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1767534000180213e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.1767534000180213e-09, "step": 40 }, { "calibration/aurc": 0.20959151987755686, "calibration/batch_distribution_entropy": 0.8048497108063604, "calibration/buffer_distribution_entropy": 0.610253581159669, "calibration/confidence_entropy": 0.5086275835583615, "calibration/coverage@0%": 0.01462140992167102, "calibration/coverage@1%": 0.01462140992167102, "calibration/coverage@10%": 0.06981299726511095, "calibration/coverage@15%": 0.17435541676288793, "calibration/coverage@20%": 0.4382758104297634, "calibration/coverage@25%": 0.7764967981818407, "calibration/coverage@30%": 0.969482288828338, "calibration/coverage@5%": 0.03694196825577507, "calibration/ece": 0.08464355289125539, "calibration/mean_confidence": 0.711897336887368, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016232638888888883, "completions/max_length": 3790.4, "completions/max_terminated_length": 3790.4, "completions/mean_length": 671.12412109375, "completions/mean_terminated_length": 682.1524047851562, "completions/min_length": 0.0, "completions/min_terminated_length": 191.6, "epoch": 0.1079986500168748, "grad_norm": 0.0004558164218906313, "learning_rate": 4.909638554216868e-06, "loss": -0.0118, "num_tokens": 85723131.0, "reward": 0.9220384001731873, "reward_std": 0.17496635913848876, "rewards/accuracy_reward": 0.6446180462837219, "rewards/brier_reward": 0.7602893829345703, "rewards/confidence_uniqueness_reward": 0.890224039554596, "rewards/format_reward": 0.9828125, "rewards/frontier_aurc_reward": -0.0022315266309306026, "rewards/frontier_ece_reward": 0.007368552498519421, "rewards/frontier_entropy_batch_reward": -0.5743717849254608, "rewards/volume_coverage_0": -4.818908792836307e-10, "rewards/volume_coverage_1": -4.818908792836307e-10, "rewards/volume_coverage_10": -7.52855996294377e-10, "rewards/volume_coverage_15": -9.36817042690441e-10, "rewards/volume_coverage_20": -4.016447382321209e-09, "rewards/volume_coverage_25": -8.517154528875359e-09, "rewards/volume_coverage_5": -4.818908792836307e-10, "signal/accuracy_reward/centered_abs_mean": 0.19286024272441865, "signal/accuracy_reward/group_std_mean": 0.2544838279485703, "signal/accuracy_reward/group_zero_std_frac": 0.27500000298023225, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09643012136220933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09643012136220933, "signal/advantage_abs_mean": 0.13153678625822068, "signal/advantage_pre_scale_abs_mean": 0.13153678625822068, "signal/advantage_pre_scale_std": 0.19726994037628173, "signal/advantage_std": 0.19726994037628173, "signal/brier_reward/centered_abs_mean": 0.15091899931430816, "signal/brier_reward/group_std_mean": 0.19479366540908813, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015091900154948235, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015091900154948235, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07453014776110649, "signal/confidence_uniqueness_reward/group_std_mean": 0.09904419332742691, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007453015027567744, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007453015027567744, "signal/format_reward/centered_abs_mean": 0.02777777723968029, "signal/format_reward/group_std_mean": 0.04943772032856941, "signal/format_reward/group_zero_std_frac": 0.8055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013888888619840145, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013888888619840145, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021528689889237286, "signal/frontier_aurc_reward/group_std_mean": 0.003593483520671725, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6910861197393388e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6910861197393388e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.055251818150281906, "signal/frontier_ece_reward/group_std_mean": 0.08745218813419342, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0055251818150281904, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0055251818150281904, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4188727140426636, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4806748628616333, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04188727214932442, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04188727214932442, "signal/volume_coverage_0/centered_abs_mean": 2.894947714882079e-09, "signal/volume_coverage_0/group_std_mean": 4.090193839179079e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9194444417953491, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.894947746800991e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.894947746800991e-10, "signal/volume_coverage_1/centered_abs_mean": 2.894947714882079e-09, "signal/volume_coverage_1/group_std_mean": 4.090193839179079e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9194444417953491, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.894947746800991e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.894947746800991e-10, "signal/volume_coverage_10/centered_abs_mean": 5.0805231688322294e-09, "signal/volume_coverage_10/group_std_mean": 7.21450300611437e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9194444417953491, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.080523245160062e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 5.080523245160062e-10, "signal/volume_coverage_15/centered_abs_mean": 8.006840668262072e-09, "signal/volume_coverage_15/group_std_mean": 1.1326063742433946e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.8388888835906982, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.006841072105697e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 8.006841072105697e-10, "signal/volume_coverage_20/centered_abs_mean": 1.9538272122421318e-08, "signal/volume_coverage_20/group_std_mean": 2.731816258760844e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7027777850627899, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.9538273103580917e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.9538273103580917e-09, "signal/volume_coverage_25/centered_abs_mean": 3.664357803856433e-08, "signal/volume_coverage_25/group_std_mean": 5.045283835158898e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.6305555552244186, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.6643578893436056e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.6643578893436056e-09, "signal/volume_coverage_5/centered_abs_mean": 2.894947714882079e-09, "signal/volume_coverage_5/group_std_mean": 4.090193839179079e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9194444417953491, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.894947746800991e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.894947746800991e-10, "step": 45 }, { "calibration/aurc": 0.43461842487265534, "calibration/batch_distribution_entropy": 0.9674538610560927, "calibration/buffer_distribution_entropy": 0.6754656669440455, "calibration/confidence_entropy": 0.5427756092593146, "calibration/coverage@0%": 0.0010666666666666667, "calibration/coverage@1%": 0.0010666666666666667, "calibration/coverage@10%": 0.0010666666666666667, "calibration/coverage@15%": 0.020158585377814314, "calibration/coverage@20%": 0.02236244212712561, "calibration/coverage@25%": 0.06972652541040605, "calibration/coverage@30%": 0.24491702153885683, "calibration/coverage@5%": 0.0010666666666666667, "calibration/ece": 0.19636142539546436, "calibration/mean_confidence": 0.5191269183857361, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014409722222222188, "completions/max_length": 3298.4, "completions/max_terminated_length": 3298.4, "completions/mean_length": 676.2962646484375, "completions/mean_terminated_length": 686.1054077148438, "completions/min_length": 0.0, "completions/min_terminated_length": 165.4, "epoch": 0.11999850001874976, "grad_norm": 0.0005109410267323256, "learning_rate": 4.759036144578314e-06, "loss": -0.0114, "num_tokens": 96611664.0, "reward": 0.9440282225608826, "reward_std": 0.16083419620990752, "rewards/accuracy_reward": 0.6298611164093018, "rewards/brier_reward": 0.7107447504997253, "rewards/confidence_uniqueness_reward": 0.936470878124237, "rewards/format_reward": 0.9849826335906983, "rewards/frontier_aurc_reward": -0.0021832690108567476, "rewards/frontier_ece_reward": -0.00903816195204854, "rewards/frontier_entropy_batch_reward": -0.2718408614397049, "rewards/volume_coverage_0": -5.804163025202591e-10, "rewards/volume_coverage_1": -5.804163025202591e-10, "rewards/volume_coverage_10": -6.20904803960709e-10, "rewards/volume_coverage_15": -8.302681847593973e-10, "rewards/volume_coverage_20": -1.2872351011494753e-09, "rewards/volume_coverage_25": -6.472662150702746e-09, "rewards/volume_coverage_5": -5.804163025202591e-10, "signal/accuracy_reward/centered_abs_mean": 0.18740234375, "signal/accuracy_reward/group_std_mean": 0.24431885480880738, "signal/accuracy_reward/group_zero_std_frac": 0.32222222685813906, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.093701171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.093701171875, "signal/advantage_abs_mean": 0.12112562209367753, "signal/advantage_pre_scale_abs_mean": 0.12112562209367753, "signal/advantage_pre_scale_std": 0.18272640705108642, "signal/advantage_std": 0.18272640705108642, "signal/brier_reward/centered_abs_mean": 0.20210157334804535, "signal/brier_reward/group_std_mean": 0.2504777073860168, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020210156962275506, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020210156962275506, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03604005612432957, "signal/confidence_uniqueness_reward/group_std_mean": 0.05931617692112923, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036040056031197308, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036040056031197308, "signal/format_reward/centered_abs_mean": 0.024745008908212185, "signal/format_reward/group_std_mean": 0.04588761366903782, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012372504454106092, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012372504454106092, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016047300770878792, "signal/frontier_aurc_reward/group_std_mean": 0.0027223533019423486, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0059126836713403e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0059126836713403e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06495674103498458, "signal/frontier_ece_reward/group_std_mean": 0.10239707678556442, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006495674047619105, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006495674047619105, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35456995368003846, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42566134333610534, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035456997156143186, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035456997156143186, "signal/volume_coverage_0/centered_abs_mean": 7.521659972642424e-09, "signal/volume_coverage_0/group_std_mean": 9.438697748387348e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9111111044883728, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.521660093379179e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.521660093379179e-10, "signal/volume_coverage_1/centered_abs_mean": 7.521659972642424e-09, "signal/volume_coverage_1/group_std_mean": 9.438697748387348e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9111111044883728, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.521660093379179e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.521660093379179e-10, "signal/volume_coverage_10/centered_abs_mean": 8.372495108321943e-09, "signal/volume_coverage_10/group_std_mean": 1.0491685134561558e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.9111111044883728, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.372495495512222e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.372495495512222e-10, "signal/volume_coverage_15/centered_abs_mean": 1.696994389455142e-08, "signal/volume_coverage_15/group_std_mean": 2.1219557444496218e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.8777777671813964, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.6969943616995664e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.6969943616995664e-09, "signal/volume_coverage_20/centered_abs_mean": 3.173251870158822e-08, "signal/volume_coverage_20/group_std_mean": 3.9903456272583074e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7833333373069763, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.1732520250349337e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.1732520250349337e-09, "signal/volume_coverage_25/centered_abs_mean": 1.1645886166533614e-07, "signal/volume_coverage_25/group_std_mean": 1.4611043983592253e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.600000011920929, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.1645886754951817e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.1645886754951817e-08, "signal/volume_coverage_5/centered_abs_mean": 7.521659972642424e-09, "signal/volume_coverage_5/group_std_mean": 9.438697748387348e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9111111044883728, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.521660093379179e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.521660093379179e-10, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.26401178674977815, "eval_calibration/batch_distribution_entropy": 0.8917495100142864, "eval_calibration/buffer_distribution_entropy": 0.7278190128157204, "eval_calibration/confidence_entropy": 0.5585034578569301, "eval_calibration/coverage@0%": 0.10601478494623656, "eval_calibration/coverage@1%": 0.10601478494623656, "eval_calibration/coverage@10%": 0.1483534946236559, "eval_calibration/coverage@15%": 0.2056451612903226, "eval_calibration/coverage@20%": 0.41431451612903225, "eval_calibration/coverage@25%": 0.712029569892473, "eval_calibration/coverage@30%": 0.8333333333333334, "eval_calibration/coverage@5%": 0.10601478494623656, "eval_calibration/ece": 0.3042033713434902, "eval_calibration/mean_confidence": 0.4784197317315613, "eval_completions/clipped_ratio": 0.015625, "eval_completions/max_length": 2400.8333333333335, "eval_completions/max_terminated_length": 2400.8333333333335, "eval_completions/mean_length": 665.2201538085938, "eval_completions/mean_terminated_length": 675.7472432454427, "eval_completions/min_length": 52.5, "eval_completions/min_terminated_length": 213.66666666666666, "eval_loss": 0.0, "eval_num_tokens": 96611664.0, "eval_reward": 0.8650682667891184, "eval_reward_std": 0.25732239087422687, "eval_rewards/accuracy_reward": 0.629340281089147, "eval_rewards/brier_reward": 0.6964937647183737, "eval_rewards/confidence_uniqueness_reward": 0.8830659290154775, "eval_rewards/format_reward": 0.9835069477558136, "eval_rewards/frontier_aurc_reward": -0.002074420607338349, "eval_rewards/frontier_ece_reward": -0.009346982141626844, "eval_rewards/frontier_entropy_batch_reward": -0.9835069477558136, "eval_rewards/volume_coverage_0": -1.5854292647240105e-09, "eval_rewards/volume_coverage_1": -1.5854292647240105e-09, "eval_rewards/volume_coverage_10": -2.291667607993523e-09, "eval_rewards/volume_coverage_15": -3.071365599349729e-09, "eval_rewards/volume_coverage_20": -4.06165686500648e-09, "eval_rewards/volume_coverage_25": -9.001347433627386e-09, "eval_rewards/volume_coverage_5": -1.5854292647240105e-09, "eval_runtime": 206.62, "eval_samples_per_second": 4.84, "eval_signal/accuracy_reward/centered_abs_mean": 0.4482964376608531, "eval_signal/accuracy_reward/group_std_mean": 0.48037030796209973, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22414821883042654, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22414821883042654, "eval_signal/advantage_abs_mean": 0.22970441232124963, "eval_signal/advantage_pre_scale_abs_mean": 0.22970441232124963, "eval_signal/advantage_pre_scale_std": 0.2550656571984291, "eval_signal/advantage_std": 0.2550656571984291, "eval_signal/brier_reward/centered_abs_mean": 0.22326942533254623, "eval_signal/brier_reward/group_std_mean": 0.2752516021331151, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022326942533254623, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022326942533254623, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.058278885980447136, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10321150409678619, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005827888535956542, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005827888535956542, "eval_signal/format_reward/centered_abs_mean": 0.031521267568071686, "eval_signal/format_reward/group_std_mean": 0.08134117722511292, "eval_signal/format_reward/group_zero_std_frac": 0.5833333532015482, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.015760633784035843, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.015760633784035843, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0012940190790686756, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0025426297021719315, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6175238215510035e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6175238215510035e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.06478989496827126, "eval_signal/frontier_ece_reward/group_std_mean": 0.10126168405016263, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0064789894968271255, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0064789894968271255, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.031521267568071686, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.08134117722511292, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5833333532015482, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0031521269120275974, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0031521269120275974, "eval_signal/volume_coverage_0/centered_abs_mean": 6.963701263777248e-09, "eval_signal/volume_coverage_0/group_std_mean": 9.221368557632559e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.7500000298023224, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.963701444188489e-10, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 6.963701444188489e-10, "eval_signal/volume_coverage_1/centered_abs_mean": 6.963701263777248e-09, "eval_signal/volume_coverage_1/group_std_mean": 9.221368557632559e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.7500000298023224, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.963701444188489e-10, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 6.963701444188489e-10, "eval_signal/volume_coverage_10/centered_abs_mean": 1.4617537217690805e-08, "eval_signal/volume_coverage_10/group_std_mean": 1.9474527879204118e-08, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.6944444676240286, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.46175369956462e-09, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.46175369956462e-09, "eval_signal/volume_coverage_15/centered_abs_mean": 2.0035404409609232e-08, "eval_signal/volume_coverage_15/group_std_mean": 2.7039340803428995e-08, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.6944444676240286, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0035404076542327e-09, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 2.0035404076542327e-09, "eval_signal/volume_coverage_20/centered_abs_mean": 2.6633061052846092e-08, "eval_signal/volume_coverage_20/group_std_mean": 3.595280454830411e-08, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.6666666865348816, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.6633059461526423e-09, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 2.6633059461526423e-09, "eval_signal/volume_coverage_25/centered_abs_mean": 4.514647552097036e-08, "eval_signal/volume_coverage_25/group_std_mean": 6.072574250855685e-08, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.5833333432674408, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 4.514647766740154e-09, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 4.514647766740154e-09, "eval_signal/volume_coverage_5/centered_abs_mean": 6.963701263777248e-09, "eval_signal/volume_coverage_5/group_std_mean": 9.221368557632559e-09, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.7500000298023224, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.963701444188489e-10, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 6.963701444188489e-10, "eval_steps_per_second": 0.029, "step": 50 }, { "calibration/aurc": 0.29729865103465236, "calibration/batch_distribution_entropy": 0.9859283937759169, "calibration/buffer_distribution_entropy": 0.7551744033764768, "calibration/confidence_entropy": 0.5197851793470234, "calibration/coverage@0%": 0.00996825658572783, "calibration/coverage@1%": 0.00996825658572783, "calibration/coverage@10%": 0.057737285457118905, "calibration/coverage@15%": 0.2131178628849404, "calibration/coverage@20%": 0.2545876791579063, "calibration/coverage@25%": 0.3962292671531242, "calibration/coverage@30%": 0.5168830431759815, "calibration/coverage@5%": 0.01679240356735513, "calibration/ece": 0.24835822174264982, "calibration/mean_confidence": 0.50909457244184, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015538194444444441, "completions/max_length": 3740.6, "completions/max_terminated_length": 3740.6, "completions/mean_length": 693.9699584960938, "completions/mean_terminated_length": 705.0622924804687, "completions/min_length": 0.0, "completions/min_terminated_length": 171.8, "epoch": 0.13199835002062474, "grad_norm": 0.00044327336945571005, "learning_rate": 4.60843373493976e-06, "loss": -0.0124, "num_tokens": 107686774.0, "reward": 0.9535149216651917, "reward_std": 0.16562986373901367, "rewards/accuracy_reward": 0.643750011920929, "rewards/brier_reward": 0.6915101885795594, "rewards/confidence_uniqueness_reward": 0.93819739818573, "rewards/format_reward": 0.9842013835906982, "rewards/frontier_aurc_reward": -0.002180169289931655, "rewards/frontier_ece_reward": -0.010109073109924793, "rewards/frontier_entropy_batch_reward": -0.2239334464073181, "rewards/volume_coverage_0": -3.921182398225475e-09, "rewards/volume_coverage_1": -3.921182398225475e-09, "rewards/volume_coverage_10": -5.6717894131308455e-09, "rewards/volume_coverage_15": -7.224053330534552e-09, "rewards/volume_coverage_20": -3.3456322601210786e-08, "rewards/volume_coverage_25": -1.1800991570609653e-07, "rewards/volume_coverage_5": -3.921182398225475e-09, "signal/accuracy_reward/centered_abs_mean": 0.19537760615348815, "signal/accuracy_reward/group_std_mean": 0.25658329427242277, "signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09768880307674407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09768880307674407, "signal/advantage_abs_mean": 0.12394773364067077, "signal/advantage_pre_scale_abs_mean": 0.12394773364067077, "signal/advantage_pre_scale_std": 0.18684935569763184, "signal/advantage_std": 0.18684935569763184, "signal/brier_reward/centered_abs_mean": 0.22628356218338014, "signal/brier_reward/group_std_mean": 0.27401196360588076, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022628356888890266, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022628356888890266, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03589446097612381, "signal/confidence_uniqueness_reward/group_std_mean": 0.06275491267442704, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003589446283876896, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003589446283876896, "signal/format_reward/centered_abs_mean": 0.0267686627805233, "signal/format_reward/group_std_mean": 0.052131906151771545, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01338433139026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01338433139026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.001548130135051906, "signal/frontier_aurc_reward/group_std_mean": 0.0025685901287943124, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9351627634023318e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9351627634023318e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06824411302804947, "signal/frontier_ece_reward/group_std_mean": 0.0973386213183403, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006824411358684302, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006824411358684302, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31762467622756957, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39661539196968076, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031762467697262764, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031762467697262764, "signal/volume_coverage_0/centered_abs_mean": 8.444079380165448e-09, "signal/volume_coverage_0/group_std_mean": 1.0913937664724927e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.7444444537162781, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.444079213631995e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.444079213631995e-10, "signal/volume_coverage_1/centered_abs_mean": 8.444079380165448e-09, "signal/volume_coverage_1/group_std_mean": 1.0913937664724927e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.7444444537162781, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.444079213631995e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.444079213631995e-10, "signal/volume_coverage_10/centered_abs_mean": 1.4421499627825796e-08, "signal/volume_coverage_10/group_std_mean": 1.8671647872281484e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.7111111164093018, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4421500160732848e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.4421500160732848e-09, "signal/volume_coverage_15/centered_abs_mean": 2.36699455480327e-08, "signal/volume_coverage_15/group_std_mean": 3.06581595665989e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.7027777910232544, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.3669946613846805e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.3669946613846805e-09, "signal/volume_coverage_20/centered_abs_mean": 6.965113588108807e-08, "signal/volume_coverage_20/group_std_mean": 8.979286612031955e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.5333333373069763, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.9651132639236835e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 6.9651132639236835e-09, "signal/volume_coverage_25/centered_abs_mean": 2.0221143959275878e-07, "signal/volume_coverage_25/group_std_mean": 2.605379972209221e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.4694444477558136, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.022114535815689e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.022114535815689e-08, "signal/volume_coverage_5/centered_abs_mean": 8.444079380165448e-09, "signal/volume_coverage_5/group_std_mean": 1.0913937664724927e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.7444444537162781, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.444079213631995e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 8.444079213631995e-10, "step": 55 }, { "calibration/aurc": 0.3616251978595281, "calibration/batch_distribution_entropy": 0.9636990149237773, "calibration/buffer_distribution_entropy": 0.7942048852247912, "calibration/confidence_entropy": 0.48655814763306565, "calibration/coverage@0%": 0.017837015808552077, "calibration/coverage@1%": 0.017837015808552077, "calibration/coverage@10%": 0.05555821654347036, "calibration/coverage@15%": 0.18383875640449895, "calibration/coverage@20%": 0.2142845849848963, "calibration/coverage@25%": 0.29187982363828147, "calibration/coverage@30%": 0.4164518759879103, "calibration/coverage@5%": 0.02254905769336883, "calibration/ece": 0.23523124663164277, "calibration/mean_confidence": 0.6070413395957835, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018836805555555537, "completions/max_length": 3748.8, "completions/max_terminated_length": 3748.8, "completions/mean_length": 731.7283813476563, "completions/mean_terminated_length": 745.8790405273437, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.14399820002249972, "grad_norm": 0.0004057070182170719, "learning_rate": 4.457831325301205e-06, "loss": -0.0151, "num_tokens": 119212861.0, "reward": 0.9394213914871216, "reward_std": 0.1709626942873001, "rewards/accuracy_reward": 0.6255208373069763, "rewards/brier_reward": 0.7058334589004517, "rewards/confidence_uniqueness_reward": 0.9315223217010498, "rewards/format_reward": 0.98046875, "rewards/frontier_aurc_reward": -0.0025447321124374865, "rewards/frontier_ece_reward": 0.0011185122653841971, "rewards/frontier_entropy_batch_reward": -0.27389044165611265, "rewards/volume_coverage_0": -4.0031589154665426e-11, "rewards/volume_coverage_1": -4.0031589154665426e-11, "rewards/volume_coverage_10": -4.686928956054714e-10, "rewards/volume_coverage_15": 7.733450324565183e-10, "rewards/volume_coverage_20": -1.1667458288400211e-08, "rewards/volume_coverage_25": -1.7245495342876894e-08, "rewards/volume_coverage_5": -4.0031589154665426e-11, "signal/accuracy_reward/centered_abs_mean": 0.19491102397441865, "signal/accuracy_reward/group_std_mean": 0.2536593437194824, "signal/accuracy_reward/group_zero_std_frac": 0.3, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09745551198720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09745551198720932, "signal/advantage_abs_mean": 0.12908945828676224, "signal/advantage_pre_scale_abs_mean": 0.12908945828676224, "signal/advantage_pre_scale_std": 0.1949590265750885, "signal/advantage_std": 0.1949590265750885, "signal/brier_reward/centered_abs_mean": 0.22423318326473235, "signal/brier_reward/group_std_mean": 0.27205477356910707, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022423317655920982, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022423317655920982, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0429903544485569, "signal/confidence_uniqueness_reward/group_std_mean": 0.06899664849042893, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004299035528674722, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004299035528674722, "signal/format_reward/centered_abs_mean": 0.03196072056889534, "signal/format_reward/group_std_mean": 0.05609421357512474, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01598036028444767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01598036028444767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023376439232379196, "signal/frontier_aurc_reward/group_std_mean": 0.0036471809260547163, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.922054991358891e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.922054991358891e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06132573038339615, "signal/frontier_ece_reward/group_std_mean": 0.08886844366788864, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00613257298246026, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00613257298246026, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35195544362068176, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4247310280799866, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0351955458521843, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0351955458521843, "signal/volume_coverage_0/centered_abs_mean": 7.865199025403058e-09, "signal/volume_coverage_0/group_std_mean": 1.0264956484817845e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.7833333373069763, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.865199214140973e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.865199214140973e-10, "signal/volume_coverage_1/centered_abs_mean": 7.865199025403058e-09, "signal/volume_coverage_1/group_std_mean": 1.0264956484817845e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.7833333373069763, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.865199214140973e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.865199214140973e-10, "signal/volume_coverage_10/centered_abs_mean": 1.0155620655538655e-08, "signal/volume_coverage_10/group_std_mean": 1.3240603413677832e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.7833333373069763, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.0155620733254266e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.0155620733254266e-09, "signal/volume_coverage_15/centered_abs_mean": 2.9428355929894678e-08, "signal/volume_coverage_15/group_std_mean": 3.860327986160428e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.5583333432674408, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.9428356862482017e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.9428356862482017e-09, "signal/volume_coverage_20/centered_abs_mean": 8.900074135453906e-08, "signal/volume_coverage_20/group_std_mean": 1.1687629637435748e-07, "signal/volume_coverage_20/group_zero_std_frac": 0.4083333432674408, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.900074099926769e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 8.900074099926769e-09, "signal/volume_coverage_25/centered_abs_mean": 1.9333558469725177e-07, "signal/volume_coverage_25/group_std_mean": 2.5326322656837873e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.336111119389534, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.9333558576306587e-08, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.9333558576306587e-08, "signal/volume_coverage_5/centered_abs_mean": 7.865199025403058e-09, "signal/volume_coverage_5/group_std_mean": 1.0264956484817845e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.7833333373069763, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.865199214140973e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.865199214140973e-10, "step": 60 }, { "calibration/aurc": 0.2726635652293484, "calibration/batch_distribution_entropy": 0.9804184648220506, "calibration/buffer_distribution_entropy": 0.8191348651993706, "calibration/confidence_entropy": 0.504464702619779, "calibration/coverage@0%": 0.019694467992829885, "calibration/coverage@1%": 0.019694467992829885, "calibration/coverage@10%": 0.030910192193535312, "calibration/coverage@15%": 0.20113148411819665, "calibration/coverage@20%": 0.4828118542224125, "calibration/coverage@25%": 0.5705832777945432, "calibration/coverage@30%": 0.6223547628605839, "calibration/coverage@5%": 0.025576820934006355, "calibration/ece": 0.2045513577079625, "calibration/mean_confidence": 0.5563930817294047, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01605902777777779, "completions/max_length": 3807.6, "completions/max_terminated_length": 3807.6, "completions/mean_length": 753.4966186523437, "completions/mean_terminated_length": 765.7596557617187, "completions/min_length": 0.0, "completions/min_terminated_length": 183.2, "epoch": 0.1559980500243747, "grad_norm": 0.000409733853302896, "learning_rate": 4.307228915662651e-06, "loss": -0.0118, "num_tokens": 130987190.0, "reward": 0.9614022612571717, "reward_std": 0.1574880450963974, "rewards/accuracy_reward": 0.6534722208976745, "rewards/brier_reward": 0.7185048341751099, "rewards/confidence_uniqueness_reward": 0.9371358990669251, "rewards/format_reward": 0.983506953716278, "rewards/frontier_aurc_reward": -0.002122586825862527, "rewards/frontier_ece_reward": 0.0014065916649997235, "rewards/frontier_entropy_batch_reward": -0.22765516638755798, "rewards/volume_coverage_0": -5.068013078646238e-10, "rewards/volume_coverage_1": -5.068013078646238e-10, "rewards/volume_coverage_10": -9.84125698988425e-10, "rewards/volume_coverage_15": -1.6737272082179011e-09, "rewards/volume_coverage_20": -2.9347400509749376e-09, "rewards/volume_coverage_25": -7.171816296391853e-09, "rewards/volume_coverage_5": -5.068013078646238e-10, "signal/accuracy_reward/centered_abs_mean": 0.1733398422598839, "signal/accuracy_reward/group_std_mean": 0.2318666011095047, "signal/accuracy_reward/group_zero_std_frac": 0.33333333134651183, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08666992112994194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08666992112994194, "signal/advantage_abs_mean": 0.11658722907304764, "signal/advantage_pre_scale_abs_mean": 0.11658722907304764, "signal/advantage_pre_scale_std": 0.18216001391410827, "signal/advantage_std": 0.18216001391410827, "signal/brier_reward/centered_abs_mean": 0.21878646910190583, "signal/brier_reward/group_std_mean": 0.2677512466907501, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021878646686673164, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021878646686673164, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03749167211353779, "signal/confidence_uniqueness_reward/group_std_mean": 0.0637543372809887, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037491672672331335, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037491672672331335, "signal/format_reward/centered_abs_mean": 0.02845052108168602, "signal/format_reward/group_std_mean": 0.05323779508471489, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01422526054084301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01422526054084301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019353487994521856, "signal/frontier_aurc_reward/group_std_mean": 0.0031194576993584635, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.419186057522893e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.419186057522893e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06133586913347244, "signal/frontier_ece_reward/group_std_mean": 0.08665431588888169, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006133586913347244, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006133586913347244, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3109153091907501, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38727723360061644, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031091532111167906, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031091532111167906, "signal/volume_coverage_0/centered_abs_mean": 2.2870564997390376e-09, "signal/volume_coverage_0/group_std_mean": 3.016555882862093e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.2870566274146854e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.2870566274146854e-10, "signal/volume_coverage_1/centered_abs_mean": 2.2870564997390376e-09, "signal/volume_coverage_1/group_std_mean": 3.016555882862093e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.2870566274146854e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.2870566274146854e-10, "signal/volume_coverage_10/centered_abs_mean": 8.391811423535955e-09, "signal/volume_coverage_10/group_std_mean": 1.086056132804103e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.8583333253860473, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.391811218144696e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 8.391811218144696e-10, "signal/volume_coverage_15/centered_abs_mean": 1.444390749227864e-08, "signal/volume_coverage_15/group_std_mean": 1.882116328344452e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.8083333373069763, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4443908041839038e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.4443908041839038e-09, "signal/volume_coverage_20/centered_abs_mean": 2.9427038361617974e-08, "signal/volume_coverage_20/group_std_mean": 3.849100700392683e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.65, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.9427039305307544e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.9427039305307544e-09, "signal/volume_coverage_25/centered_abs_mean": 6.738196756117532e-08, "signal/volume_coverage_25/group_std_mean": 8.714540680188066e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.522222226858139, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 6.738196756117531e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 6.738196756117531e-09, "signal/volume_coverage_5/centered_abs_mean": 2.2870564997390376e-09, "signal/volume_coverage_5/group_std_mean": 3.016555882862093e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.2870566274146854e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.2870566274146854e-10, "step": 65 }, { "calibration/aurc": 0.3393896341269902, "calibration/batch_distribution_entropy": 0.9831179408403127, "calibration/buffer_distribution_entropy": 0.8426835690912935, "calibration/confidence_entropy": 0.5218072154111429, "calibration/coverage@0%": 0.007841300976716198, "calibration/coverage@1%": 0.007841300976716198, "calibration/coverage@10%": 0.00836213431004953, "calibration/coverage@15%": 0.00836213431004953, "calibration/coverage@20%": 0.01679714622969032, "calibration/coverage@25%": 0.11030523827709116, "calibration/coverage@30%": 0.3504912616795737, "calibration/coverage@5%": 0.007841300976716198, "calibration/ece": 0.21396587911327264, "calibration/mean_confidence": 0.53053761128133, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01571180555555558, "completions/max_length": 3301.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 747.0869018554688, "completions/mean_terminated_length": 759.0447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 180.6, "epoch": 0.16799790002624967, "grad_norm": 0.00042153653339482844, "learning_rate": 4.156626506024097e-06, "loss": -0.0122, "num_tokens": 142671775.0, "reward": 0.9516011714935303, "reward_std": 0.155648335814476, "rewards/accuracy_reward": 0.6380208373069763, "rewards/brier_reward": 0.694776999950409, "rewards/confidence_uniqueness_reward": 0.9385874152183533, "rewards/format_reward": 0.9842013955116272, "rewards/frontier_aurc_reward": -0.00205742665566504, "rewards/frontier_ece_reward": -0.005769663273531478, "rewards/frontier_entropy_batch_reward": -0.2224372446537018, "rewards/volume_coverage_0": -1.3082197200531277e-10, "rewards/volume_coverage_1": -1.3082197200531277e-10, "rewards/volume_coverage_10": -1.3082197200531277e-10, "rewards/volume_coverage_15": -9.936441391911188e-10, "rewards/volume_coverage_20": -1.2587668718477829e-08, "rewards/volume_coverage_25": -1.5884197990256867e-08, "rewards/volume_coverage_5": -1.3082197200531277e-10, "signal/accuracy_reward/centered_abs_mean": 0.17438151240348815, "signal/accuracy_reward/group_std_mean": 0.22899937331676484, "signal/accuracy_reward/group_zero_std_frac": 0.35000000298023226, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08719075620174407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08719075620174407, "signal/advantage_abs_mean": 0.11629650443792343, "signal/advantage_pre_scale_abs_mean": 0.11629650443792343, "signal/advantage_pre_scale_std": 0.17956087589263917, "signal/advantage_std": 0.17956087589263917, "signal/brier_reward/centered_abs_mean": 0.22307340502738954, "signal/brier_reward/group_std_mean": 0.27035006880760193, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022307340800762177, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022307340800762177, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03593003079295158, "signal/confidence_uniqueness_reward/group_std_mean": 0.05953029617667198, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003593003237619996, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003593003237619996, "signal/format_reward/centered_abs_mean": 0.02693142332136631, "signal/format_reward/group_std_mean": 0.04885709583759308, "signal/format_reward/group_zero_std_frac": 0.8055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013465711660683156, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013465711660683156, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016406909562647343, "signal/frontier_aurc_reward/group_std_mean": 0.002687893947586417, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.050863695330918e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.050863695330918e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06620457619428635, "signal/frontier_ece_reward/group_std_mean": 0.09011965543031693, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006620457675307989, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006620457675307989, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.312404465675354, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39026838541030884, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031240447983145715, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031240447983145715, "signal/volume_coverage_0/centered_abs_mean": 6.527382523380254e-10, "signal/volume_coverage_0/group_std_mean": 8.340057505717979e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_1/centered_abs_mean": 6.527382523380254e-10, "signal/volume_coverage_1/group_std_mean": 8.340057505717979e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_10/centered_abs_mean": 6.527382523380254e-10, "signal/volume_coverage_10/group_std_mean": 8.340057505717979e-10, "signal/volume_coverage_10/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_15/centered_abs_mean": 2.0081497686952334e-09, "signal/volume_coverage_15/group_std_mean": 2.5946954695932335e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.9277777671813965, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0081497367763212e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.0081497367763212e-10, "signal/volume_coverage_20/centered_abs_mean": 1.9240841941936536e-08, "signal/volume_coverage_20/group_std_mean": 2.483854966151e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7888888835906982, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.924084341853316e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.924084341853316e-09, "signal/volume_coverage_25/centered_abs_mean": 2.63578932013786e-08, "signal/volume_coverage_25/group_std_mean": 3.40409355814586e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.75, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.635789275728939e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.635789275728939e-09, "signal/volume_coverage_5/centered_abs_mean": 6.527382523380254e-10, "signal/volume_coverage_5/group_std_mean": 8.340057505717979e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.527382211130029e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 6.527382211130029e-11, "step": 70 }, { "calibration/aurc": 0.26635782488515874, "calibration/batch_distribution_entropy": 0.9667058065508993, "calibration/buffer_distribution_entropy": 0.8619489597654877, "calibration/confidence_entropy": 0.5343618937372703, "calibration/coverage@0%": 0.007416344462318554, "calibration/coverage@1%": 0.007416344462318554, "calibration/coverage@10%": 0.13569970920759128, "calibration/coverage@15%": 0.24482519000498995, "calibration/coverage@20%": 0.34556386311354187, "calibration/coverage@25%": 0.42643206444033765, "calibration/coverage@30%": 0.6321881541722456, "calibration/coverage@5%": 0.04180787885385294, "calibration/ece": 0.21680189415961587, "calibration/mean_confidence": 0.5755003843171993, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014670138888888884, "completions/max_length": 3507.6, "completions/max_terminated_length": 3507.6, "completions/mean_length": 736.2677124023437, "completions/mean_terminated_length": 747.2797119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.2, "epoch": 0.17999775002812465, "grad_norm": 0.0004315000551287085, "learning_rate": 4.006024096385543e-06, "loss": -0.0117, "num_tokens": 154218475.0, "reward": 0.9771409153938293, "reward_std": 0.15745867788791656, "rewards/accuracy_reward": 0.6884548664093018, "rewards/brier_reward": 0.7252273917198181, "rewards/confidence_uniqueness_reward": 0.936829400062561, "rewards/format_reward": 0.9852430582046509, "rewards/frontier_aurc_reward": -0.0018611573614180088, "rewards/frontier_ece_reward": -0.003944494191091508, "rewards/frontier_entropy_batch_reward": -0.25496000945568087, "rewards/volume_coverage_0": -5.460884883179418e-10, "rewards/volume_coverage_1": -5.460884883179418e-10, "rewards/volume_coverage_10": -1.2820269373881032e-09, "rewards/volume_coverage_15": -2.6095399480174917e-09, "rewards/volume_coverage_20": -9.53145136151079e-09, "rewards/volume_coverage_25": -2.2126803767008597e-08, "rewards/volume_coverage_5": -5.460884883179418e-10, "signal/accuracy_reward/centered_abs_mean": 0.1789984792470932, "signal/accuracy_reward/group_std_mean": 0.2308933675289154, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0894992396235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0894992396235466, "signal/advantage_abs_mean": 0.11769915223121644, "signal/advantage_pre_scale_abs_mean": 0.11769915223121644, "signal/advantage_pre_scale_std": 0.1850135773420334, "signal/advantage_std": 0.1850135773420334, "signal/brier_reward/centered_abs_mean": 0.20045875906944274, "signal/brier_reward/group_std_mean": 0.24788658916950226, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020045876502990723, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020045876502990723, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036058619245886804, "signal/confidence_uniqueness_reward/group_std_mean": 0.06366177275776863, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036058619152754545, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036058619152754545, "signal/format_reward/centered_abs_mean": 0.026019965298473835, "signal/format_reward/group_std_mean": 0.05199590064585209, "signal/format_reward/group_zero_std_frac": 0.775, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013009982649236917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013009982649236917, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016442745458334685, "signal/frontier_aurc_reward/group_std_mean": 0.0026633210014551877, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.055343247775454e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.055343247775454e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05721868574619293, "signal/frontier_ece_reward/group_std_mean": 0.08083326071500778, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005721868854016066, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005721868854016066, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3330303609371185, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40677814483642577, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03330303654074669, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03330303654074669, "signal/volume_coverage_0/centered_abs_mean": 1.969390450717867e-09, "signal/volume_coverage_0/group_std_mean": 2.5594340091750213e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.925, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.9693905742301788e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.9693905742301788e-10, "signal/volume_coverage_1/centered_abs_mean": 1.969390450717867e-09, "signal/volume_coverage_1/group_std_mean": 2.5594340091750213e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.925, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.9693905742301788e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.9693905742301788e-10, "signal/volume_coverage_10/centered_abs_mean": 4.858173541233413e-09, "signal/volume_coverage_10/group_std_mean": 6.3628841862062305e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.8305555582046509, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.858173523192288e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.858173523192288e-10, "signal/volume_coverage_15/centered_abs_mean": 1.0683860851568028e-08, "signal/volume_coverage_15/group_std_mean": 1.3964444456338044e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.725, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.0683861344229494e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.0683861344229494e-09, "signal/volume_coverage_20/centered_abs_mean": 3.9493965964254586e-08, "signal/volume_coverage_20/group_std_mean": 5.174840893573673e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.6194444358348846, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.94939684289497e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.94939684289497e-09, "signal/volume_coverage_25/centered_abs_mean": 8.536078723864194e-08, "signal/volume_coverage_25/group_std_mean": 1.1200458835958215e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.5416666686534881, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 8.53607891038166e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 8.53607891038166e-09, "signal/volume_coverage_5/centered_abs_mean": 1.969390450717867e-09, "signal/volume_coverage_5/group_std_mean": 2.5594340091750213e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.925, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.9693905742301788e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.9693905742301788e-10, "step": 75 }, { "calibration/aurc": 0.2429387059383234, "calibration/batch_distribution_entropy": 0.9529905402102818, "calibration/buffer_distribution_entropy": 0.8738844216235699, "calibration/confidence_entropy": 0.5125112143308769, "calibration/coverage@0%": 0.010054332874111489, "calibration/coverage@1%": 0.010054332874111489, "calibration/coverage@10%": 0.06927349297909838, "calibration/coverage@15%": 0.3246663063262004, "calibration/coverage@20%": 0.535180322504506, "calibration/coverage@25%": 0.6291437787886591, "calibration/coverage@30%": 0.6753869481875662, "calibration/coverage@5%": 0.017928348622142987, "calibration/ece": 0.20932305149161134, "calibration/mean_confidence": 0.6136822693300799, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013888888888888885, "completions/max_length": 3717.2, "completions/max_terminated_length": 3717.2, "completions/mean_length": 752.978662109375, "completions/mean_terminated_length": 763.6736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.19199760002999963, "grad_norm": 0.00040160753997042775, "learning_rate": 3.855421686746989e-06, "loss": -0.0103, "num_tokens": 165946069.0, "reward": 0.9624287486076355, "reward_std": 0.1525136023759842, "rewards/accuracy_reward": 0.6596354246139526, "rewards/brier_reward": 0.7311666131019592, "rewards/confidence_uniqueness_reward": 0.9364403367042542, "rewards/format_reward": 0.9856770873069763, "rewards/frontier_aurc_reward": -0.0021038626320660113, "rewards/frontier_ece_reward": 0.0012513543479144573, "rewards/frontier_entropy_batch_reward": -0.27087019085884095, "rewards/volume_coverage_0": -4.568020739448286e-10, "rewards/volume_coverage_1": -4.568020739448286e-10, "rewards/volume_coverage_10": -2.0638052045107712e-09, "rewards/volume_coverage_15": -2.9442815886737693e-09, "rewards/volume_coverage_20": -4.258802924206328e-09, "rewards/volume_coverage_25": -9.412961088361981e-09, "rewards/volume_coverage_5": -4.568020739448286e-10, "signal/accuracy_reward/centered_abs_mean": 0.17906358242034912, "signal/accuracy_reward/group_std_mean": 0.23346365988254547, "signal/accuracy_reward/group_zero_std_frac": 0.34722223281860354, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08953179121017456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08953179121017456, "signal/advantage_abs_mean": 0.1167010024189949, "signal/advantage_pre_scale_abs_mean": 0.1167010024189949, "signal/advantage_pre_scale_std": 0.17792364954948425, "signal/advantage_std": 0.17792364954948425, "signal/brier_reward/centered_abs_mean": 0.192233869433403, "signal/brier_reward/group_std_mean": 0.23862990140914916, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019223386794328688, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019223386794328688, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031697943806648254, "signal/confidence_uniqueness_reward/group_std_mean": 0.0505401112139225, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003169794473797083, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003169794473797083, "signal/format_reward/centered_abs_mean": 0.02013346329331398, "signal/format_reward/group_std_mean": 0.03664347417652607, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01006673164665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01006673164665699, "signal/frontier_aurc_reward/centered_abs_mean": 0.001982904877513647, "signal/frontier_aurc_reward/group_std_mean": 0.0031933929305523632, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4786311405478047e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4786311405478047e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05222774744033813, "signal/frontier_ece_reward/group_std_mean": 0.07532420605421067, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00522277494892478, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00522277494892478, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33515734076499937, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40675837397575376, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03351573422551155, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03351573422551155, "signal/volume_coverage_0/centered_abs_mean": 2.5033016437525645e-09, "signal/volume_coverage_0/group_std_mean": 3.3354313089262176e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8972222208976746, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.503301503586908e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.503301503586908e-10, "signal/volume_coverage_1/centered_abs_mean": 2.5033016437525645e-09, "signal/volume_coverage_1/group_std_mean": 3.3354313089262176e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8972222208976746, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.503301503586908e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.503301503586908e-10, "signal/volume_coverage_10/centered_abs_mean": 7.160277382123326e-09, "signal/volume_coverage_10/group_std_mean": 9.599346739719294e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.8472222149372101, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.160277197548748e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.160277197548748e-10, "signal/volume_coverage_15/centered_abs_mean": 1.2688166350915964e-08, "signal/volume_coverage_15/group_std_mean": 1.6908503219781323e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.7361111164093017, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.2688165622332104e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.2688165622332104e-09, "signal/volume_coverage_20/centered_abs_mean": 1.812260547184863e-08, "signal/volume_coverage_20/group_std_mean": 2.409684691784264e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.6916666626930237, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.8122604854287071e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.8122604854287071e-09, "signal/volume_coverage_25/centered_abs_mean": 7.594725190962492e-08, "signal/volume_coverage_25/group_std_mean": 1.0130850087719523e-07, "signal/volume_coverage_25/group_zero_std_frac": 0.6416666686534882, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.594725902337895e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 7.594725902337895e-09, "signal/volume_coverage_5/centered_abs_mean": 2.5033016437525645e-09, "signal/volume_coverage_5/group_std_mean": 3.3354313089262176e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.8972222208976746, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.503301503586908e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.503301503586908e-10, "step": 80 }, { "calibration/aurc": 0.272205180148172, "calibration/batch_distribution_entropy": 0.9839264217170511, "calibration/buffer_distribution_entropy": 0.8838308061064566, "calibration/confidence_entropy": 0.5268860344745087, "calibration/coverage@0%": 0.02139878392542739, "calibration/coverage@1%": 0.02139878392542739, "calibration/coverage@10%": 0.06428258517319922, "calibration/coverage@15%": 0.13265800491797142, "calibration/coverage@20%": 0.2533631740237014, "calibration/coverage@25%": 0.3971892928952113, "calibration/coverage@30%": 0.6274081366442885, "calibration/coverage@5%": 0.02244045059209406, "calibration/ece": 0.18339377635586349, "calibration/mean_confidence": 0.5337971173985369, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009548611111111115, "completions/max_length": 3463.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 743.687255859375, "completions/mean_terminated_length": 750.9121826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 227.8, "epoch": 0.2039974500318746, "grad_norm": 0.00039456840022467077, "learning_rate": 3.7048192771084342e-06, "loss": -0.007, "num_tokens": 177600546.0, "reward": 0.977763569355011, "reward_std": 0.14530135691165924, "rewards/accuracy_reward": 0.68359375, "rewards/brier_reward": 0.7319249629974365, "rewards/confidence_uniqueness_reward": 0.9425673007965087, "rewards/format_reward": 0.9901041626930237, "rewards/frontier_aurc_reward": -0.0018502724356949329, "rewards/frontier_ece_reward": -0.0009264084612368606, "rewards/frontier_entropy_batch_reward": -0.2641885936260223, "rewards/volume_coverage_0": -1.9229808995158136e-10, "rewards/volume_coverage_1": -1.9229808995158136e-10, "rewards/volume_coverage_10": -4.5319727554060664e-10, "rewards/volume_coverage_15": -1.2661274056968707e-09, "rewards/volume_coverage_20": -4.186512286841193e-09, "rewards/volume_coverage_25": -5.821830104113523e-09, "rewards/volume_coverage_5": -1.9229808995158136e-10, "signal/accuracy_reward/centered_abs_mean": 0.16773546040058135, "signal/accuracy_reward/group_std_mean": 0.2227248728275299, "signal/accuracy_reward/group_zero_std_frac": 0.3527777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08386773020029067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08386773020029067, "signal/advantage_abs_mean": 0.1090440571308136, "signal/advantage_pre_scale_abs_mean": 0.1090440571308136, "signal/advantage_pre_scale_std": 0.1700405955314636, "signal/advantage_std": 0.1700405955314636, "signal/brier_reward/centered_abs_mean": 0.1963264286518097, "signal/brier_reward/group_std_mean": 0.2435892939567566, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019632643461227416, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019632643461227416, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028791505470871927, "signal/confidence_uniqueness_reward/group_std_mean": 0.048539139330387115, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028791506309062244, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028791506309062244, "signal/format_reward/centered_abs_mean": 0.01761067733168602, "signal/format_reward/group_std_mean": 0.03504730835556984, "signal/format_reward/group_zero_std_frac": 0.8527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00880533866584301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00880533866584301, "signal/frontier_aurc_reward/centered_abs_mean": 0.001829707226715982, "signal/frontier_aurc_reward/group_std_mean": 0.0029482690151780844, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.287134120706469e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.287134120706469e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05751297697424888, "signal/frontier_ece_reward/group_std_mean": 0.08030709475278855, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005751297902315855, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005751297902315855, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33534626960754393, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40883824825286863, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03353462740778923, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03353462740778923, "signal/volume_coverage_0/centered_abs_mean": 8.776430915347078e-10, "signal/volume_coverage_0/group_std_mean": 1.13681020152967e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.776430762691411e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.776430762691411e-11, "signal/volume_coverage_1/centered_abs_mean": 8.776430915347078e-10, "signal/volume_coverage_1/group_std_mean": 1.13681020152967e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.776430762691411e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.776430762691411e-11, "signal/volume_coverage_10/centered_abs_mean": 1.6664782886977036e-09, "signal/volume_coverage_10/group_std_mean": 2.1400602179255657e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9611111044883728, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.6664783095143854e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.6664783095143854e-10, "signal/volume_coverage_15/centered_abs_mean": 1.3187805891767824e-08, "signal/volume_coverage_15/group_std_mean": 1.7327763579544352e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.8333333373069763, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.3187805479597524e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.3187805479597524e-09, "signal/volume_coverage_20/centered_abs_mean": 2.027334065068942e-08, "signal/volume_coverage_20/group_std_mean": 2.671985650515296e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7722222328186035, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.0273341443111104e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.0273341443111104e-09, "signal/volume_coverage_25/centered_abs_mean": 3.823409808756395e-08, "signal/volume_coverage_25/group_std_mean": 5.032821552841682e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7694444537162781, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.823410063552579e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.823410063552579e-09, "signal/volume_coverage_5/centered_abs_mean": 8.776430915347078e-10, "signal/volume_coverage_5/group_std_mean": 1.13681020152967e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.776430762691411e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 8.776430762691411e-11, "step": 85 }, { "calibration/aurc": 0.19922293559866006, "calibration/batch_distribution_entropy": 0.9824476168092499, "calibration/buffer_distribution_entropy": 0.8959630638539714, "calibration/confidence_entropy": 0.5189281704536632, "calibration/coverage@0%": 0.025054624089309953, "calibration/coverage@1%": 0.025054624089309953, "calibration/coverage@10%": 0.256557141472063, "calibration/coverage@15%": 0.38917092578741797, "calibration/coverage@20%": 0.5117063492063492, "calibration/coverage@25%": 0.6346587161242334, "calibration/coverage@30%": 0.8457187767532595, "calibration/coverage@5%": 0.09872118168093298, "calibration/ece": 0.23940879525551675, "calibration/mean_confidence": 0.5118995725139233, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010329861111111093, "completions/max_length": 3339.2, "completions/max_terminated_length": 3339.2, "completions/mean_length": 700.8325561523437, "completions/mean_terminated_length": 708.1759155273437, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.2159973000337496, "grad_norm": 0.00047273669042624533, "learning_rate": 3.5542168674698798e-06, "loss": -0.0082, "num_tokens": 188742809.0, "reward": 0.9794747948646545, "reward_std": 0.14528339505195617, "rewards/accuracy_reward": 0.6794270873069763, "rewards/brier_reward": 0.7353934526443482, "rewards/confidence_uniqueness_reward": 0.9436571598052979, "rewards/format_reward": 0.9894965171813965, "rewards/frontier_aurc_reward": -0.0016743445303291082, "rewards/frontier_ece_reward": 0.0010622843401506543, "rewards/frontier_entropy_batch_reward": -0.2297738403081894, "rewards/volume_coverage_0": -3.8754242245864213e-10, "rewards/volume_coverage_1": -3.8754242245864213e-10, "rewards/volume_coverage_10": -5.763823057725049e-10, "rewards/volume_coverage_15": -6.379984013316075e-10, "rewards/volume_coverage_20": -3.930808684105002e-09, "rewards/volume_coverage_25": -1.0270524869460118e-08, "rewards/volume_coverage_5": -3.8754242245864213e-10, "signal/accuracy_reward/centered_abs_mean": 0.17314995527267457, "signal/accuracy_reward/group_std_mean": 0.22633666098117827, "signal/accuracy_reward/group_zero_std_frac": 0.3527777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08657497763633729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08657497763633729, "signal/advantage_abs_mean": 0.10956384688615799, "signal/advantage_pre_scale_abs_mean": 0.10956384688615799, "signal/advantage_pre_scale_std": 0.1703871190547943, "signal/advantage_std": 0.1703871190547943, "signal/brier_reward/centered_abs_mean": 0.19396249949932098, "signal/brier_reward/group_std_mean": 0.24135461449623108, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019396250322461128, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019396250322461128, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028895640373229982, "signal/confidence_uniqueness_reward/group_std_mean": 0.04839541018009186, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028895641677081584, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028895641677081584, "signal/format_reward/centered_abs_mean": 0.01848415769636631, "signal/format_reward/group_std_mean": 0.035885289683938025, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009242078848183155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009242078848183155, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016561457188799978, "signal/frontier_aurc_reward/group_std_mean": 0.002781048696488142, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0701821995317005e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0701821995317005e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06060823351144791, "signal/frontier_ece_reward/group_std_mean": 0.08322409242391586, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0060608237981796265, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0060608237981796265, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3175591230392456, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39123265743255614, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03175591304898262, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03175591304898262, "signal/volume_coverage_0/centered_abs_mean": 1.2535025073479033e-09, "signal/volume_coverage_0/group_std_mean": 1.6031172722641428e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9583333373069763, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2535024684900974e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.2535024684900974e-10, "signal/volume_coverage_1/centered_abs_mean": 1.2535025073479033e-09, "signal/volume_coverage_1/group_std_mean": 1.6031172722641428e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9583333373069763, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2535024684900974e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.2535024684900974e-10, "signal/volume_coverage_10/centered_abs_mean": 2.88981866769511e-09, "signal/volume_coverage_10/group_std_mean": 3.737770304379495e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9166666626930237, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.8898186482662067e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.8898186482662067e-10, "signal/volume_coverage_15/centered_abs_mean": 3.634362555349213e-09, "signal/volume_coverage_15/group_std_mean": 4.710481982250769e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.8611111164093017, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.6343624776336016e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.6343624776336016e-10, "signal/volume_coverage_20/centered_abs_mean": 1.5966736732764274e-08, "signal/volume_coverage_20/group_std_mean": 2.0725850369274212e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7833333253860474, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.5966736904848843e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.5966736904848843e-09, "signal/volume_coverage_25/centered_abs_mean": 3.288729004680135e-08, "signal/volume_coverage_25/group_std_mean": 4.266394340035617e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7222222208976745, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.288729222283848e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.288729222283848e-09, "signal/volume_coverage_5/centered_abs_mean": 1.2535025073479033e-09, "signal/volume_coverage_5/group_std_mean": 1.6031172722641428e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9583333373069763, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2535024684900974e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.2535024684900974e-10, "step": 90 }, { "calibration/aurc": 0.2507009224960528, "calibration/batch_distribution_entropy": 0.9775486373906073, "calibration/buffer_distribution_entropy": 0.9065992578768183, "calibration/confidence_entropy": 0.5001866055085517, "calibration/coverage@0%": 0.03300766861293068, "calibration/coverage@1%": 0.03300766861293068, "calibration/coverage@10%": 0.2548171806438221, "calibration/coverage@15%": 0.40902696201277405, "calibration/coverage@20%": 0.5034949578671087, "calibration/coverage@25%": 0.538191739190496, "calibration/coverage@30%": 0.6138651747478934, "calibration/coverage@5%": 0.0885765078483404, "calibration/ece": 0.2086511975868341, "calibration/mean_confidence": 0.5522644108583503, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111116, "completions/max_length": 3247.8, "completions/max_terminated_length": 3247.8, "completions/mean_length": 693.4934814453125, "completions/mean_terminated_length": 698.5267456054687, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.22799715003562457, "grad_norm": 0.0003818366676568985, "learning_rate": 3.4036144578313257e-06, "loss": -0.0053, "num_tokens": 199823534.0, "reward": 0.9745383620262146, "reward_std": 0.13535202145576478, "rewards/accuracy_reward": 0.6660590291023254, "rewards/brier_reward": 0.7394654631614686, "rewards/confidence_uniqueness_reward": 0.9451273560523987, "rewards/format_reward": 0.9927951335906983, "rewards/frontier_aurc_reward": -0.001798482658341527, "rewards/frontier_ece_reward": 0.0039430757868103685, "rewards/frontier_entropy_batch_reward": -0.237198144197464, "rewards/volume_coverage_0": -1.48676847916418e-10, "rewards/volume_coverage_1": -1.48676847916418e-10, "rewards/volume_coverage_10": 1.4221258060054254e-10, "rewards/volume_coverage_15": -9.984373341986873e-10, "rewards/volume_coverage_20": -1.1343260775120712e-09, "rewards/volume_coverage_25": -1.9268936113370216e-09, "rewards/volume_coverage_5": -1.48676847916418e-10, "signal/accuracy_reward/centered_abs_mean": 0.15666775405406952, "signal/accuracy_reward/group_std_mean": 0.20973491668701172, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07833387702703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07833387702703476, "signal/advantage_abs_mean": 0.10032767802476883, "signal/advantage_pre_scale_abs_mean": 0.10032767802476883, "signal/advantage_pre_scale_std": 0.15808959305286407, "signal/advantage_std": 0.15808959305286407, "signal/brier_reward/centered_abs_mean": 0.18711373209953308, "signal/brier_reward/group_std_mean": 0.2359489381313324, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01871137283742428, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01871137283742428, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02431831918656826, "signal/confidence_uniqueness_reward/group_std_mean": 0.04133199006319046, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024318320211023092, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024318320211023092, "signal/format_reward/centered_abs_mean": 0.01224500872194767, "signal/format_reward/group_std_mean": 0.026611294224858285, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006122504360973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006122504360973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018433568766340613, "signal/frontier_aurc_reward/group_std_mean": 0.003007926885038614, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3041959866532125e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3041959866532125e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05773395150899887, "signal/frontier_ece_reward/group_std_mean": 0.07975224107503891, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005773395299911499, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005773395299911499, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3170252025127411, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3918557822704315, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031702518835663794, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031702518835663794, "signal/volume_coverage_0/centered_abs_mean": 8.475213697511918e-10, "signal/volume_coverage_0/group_std_mean": 1.120057463444013e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9666666626930237, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.475213968128781e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.475213968128781e-11, "signal/volume_coverage_1/centered_abs_mean": 8.475213697511918e-10, "signal/volume_coverage_1/group_std_mean": 1.120057463444013e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9666666626930237, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.475213968128781e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.475213968128781e-11, "signal/volume_coverage_10/centered_abs_mean": 2.524260600544892e-09, "signal/volume_coverage_10/group_std_mean": 3.2873440358738293e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9333333253860474, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.524260685893287e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.524260685893287e-10, "signal/volume_coverage_15/centered_abs_mean": 5.845151940242488e-09, "signal/volume_coverage_15/group_std_mean": 7.716072530916662e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.7861111164093018, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.845151931915815e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.845151931915815e-10, "signal/volume_coverage_20/centered_abs_mean": 7.86484022352596e-09, "signal/volume_coverage_20/group_std_mean": 1.0402011918486664e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7722222328186035, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.864840639859594e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 7.864840639859594e-10, "signal/volume_coverage_25/centered_abs_mean": 1.438830108391187e-08, "signal/volume_coverage_25/group_std_mean": 1.8884527674956075e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.661111107468605, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.4388301466938812e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.4388301466938812e-09, "signal/volume_coverage_5/centered_abs_mean": 8.475213697511918e-10, "signal/volume_coverage_5/group_std_mean": 1.120057463444013e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9666666626930237, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.475213968128781e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 8.475213968128781e-11, "step": 95 }, { "calibration/aurc": 0.18728217917272516, "calibration/batch_distribution_entropy": 0.9694950506314033, "calibration/buffer_distribution_entropy": 0.9121722739252384, "calibration/confidence_entropy": 0.5021543179603883, "calibration/coverage@0%": 0.017372255964316463, "calibration/coverage@1%": 0.017372255964316463, "calibration/coverage@10%": 0.11814513525377532, "calibration/coverage@15%": 0.4494851837396229, "calibration/coverage@20%": 0.6551994681070942, "calibration/coverage@25%": 0.8486937388591802, "calibration/coverage@30%": 0.9043421345811051, "calibration/coverage@5%": 0.028346665413135363, "calibration/ece": 0.17114394458500604, "calibration/mean_confidence": 0.5716688069748909, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333348, "completions/max_length": 3231.4, "completions/max_terminated_length": 3231.4, "completions/mean_length": 706.346533203125, "completions/mean_terminated_length": 714.49365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 212.2, "epoch": 0.23999700003749952, "grad_norm": 0.0004011181299574673, "learning_rate": 3.2530120481927713e-06, "loss": -0.0082, "num_tokens": 211059718.0, "reward": 0.9819074869155884, "reward_std": 0.13814267814159392, "rewards/accuracy_reward": 0.6813368082046509, "rewards/brier_reward": 0.7655829429626465, "rewards/confidence_uniqueness_reward": 0.940147602558136, "rewards/format_reward": 0.9884548544883728, "rewards/frontier_aurc_reward": -0.0016520792851224542, "rewards/frontier_ece_reward": 0.010339464247226714, "rewards/frontier_entropy_batch_reward": -0.24574714303016662, "rewards/volume_coverage_0": 7.541852403009086e-11, "rewards/volume_coverage_1": 7.541852403009086e-11, "rewards/volume_coverage_10": -2.984458390489775e-10, "rewards/volume_coverage_15": -3.88511360756294e-10, "rewards/volume_coverage_20": -6.244302493477183e-10, "rewards/volume_coverage_25": -5.88479067697989e-09, "rewards/volume_coverage_5": 7.541852403009086e-11, "signal/accuracy_reward/centered_abs_mean": 0.16082356572151185, "signal/accuracy_reward/group_std_mean": 0.21090216040611268, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08041178286075593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08041178286075593, "signal/advantage_abs_mean": 0.10463630557060241, "signal/advantage_pre_scale_abs_mean": 0.10463630557060241, "signal/advantage_pre_scale_std": 0.16416526734828948, "signal/advantage_std": 0.16416526734828948, "signal/brier_reward/centered_abs_mean": 0.17226437330245972, "signal/brier_reward/group_std_mean": 0.21906766891479493, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017226437106728554, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017226437106728554, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029767391458153724, "signal/confidence_uniqueness_reward/group_std_mean": 0.049494147300720215, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029767390806227922, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029767390806227922, "signal/format_reward/centered_abs_mean": 0.018419053964316846, "signal/format_reward/group_std_mean": 0.035861417278647424, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009209526982158423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009209526982158423, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018402117071673273, "signal/frontier_aurc_reward/group_std_mean": 0.002930039027705789, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.300264750374481e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.300264750374481e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0525245763361454, "signal/frontier_ece_reward/group_std_mean": 0.0730134516954422, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005252457968890667, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005252457968890667, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32134751677513124, "signal/frontier_entropy_batch_reward/group_std_mean": 0.393948096036911, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032134751230478285, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032134751230478285, "signal/volume_coverage_0/centered_abs_mean": 8.663491951610425e-10, "signal/volume_coverage_0/group_std_mean": 1.1307943567628876e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9722222208976745, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.663492540549045e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.663492540549045e-11, "signal/volume_coverage_1/centered_abs_mean": 8.663491951610425e-10, "signal/volume_coverage_1/group_std_mean": 1.1307943567628876e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9722222208976745, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.663492540549045e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.663492540549045e-11, "signal/volume_coverage_10/centered_abs_mean": 4.449955628982316e-09, "signal/volume_coverage_10/group_std_mean": 5.785257982393599e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.8777777791023255, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.4499558877163226e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.4499558877163226e-10, "signal/volume_coverage_15/centered_abs_mean": 6.421526446698245e-09, "signal/volume_coverage_15/group_std_mean": 8.361979440818512e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.8277777791023254, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.421526364038671e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.421526364038671e-10, "signal/volume_coverage_20/centered_abs_mean": 8.840661205877787e-09, "signal/volume_coverage_20/group_std_mean": 1.1587044899952215e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7861111104488373, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.840661395222854e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 8.840661395222854e-10, "signal/volume_coverage_25/centered_abs_mean": 3.609103034207806e-08, "signal/volume_coverage_25/group_std_mean": 4.8652312312214006e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.622222226858139, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.6091031579976727e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.6091031579976727e-09, "signal/volume_coverage_5/centered_abs_mean": 8.663491951610425e-10, "signal/volume_coverage_5/group_std_mean": 1.1307943567628876e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9722222208976745, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.663492540549045e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 8.663492540549045e-11, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.21511125000674836, "eval_calibration/batch_distribution_entropy": 0.91558582212121, "eval_calibration/buffer_distribution_entropy": 0.916314646286911, "eval_calibration/confidence_entropy": 0.5371354515997979, "eval_calibration/coverage@0%": 0.11544578853046594, "eval_calibration/coverage@1%": 0.11544578853046594, "eval_calibration/coverage@10%": 0.2196124551971326, "eval_calibration/coverage@15%": 0.3522513440860215, "eval_calibration/coverage@20%": 0.5835013440860215, "eval_calibration/coverage@25%": 0.7750336021505376, "eval_calibration/coverage@30%": 0.9894153225806451, "eval_calibration/coverage@5%": 0.11544578853046594, "eval_calibration/ece": 0.2672830977154876, "eval_calibration/mean_confidence": 0.5755224611157268, "eval_completions/clipped_ratio": 0.009548611111111105, "eval_completions/max_length": 2202.6666666666665, "eval_completions/max_terminated_length": 2202.6666666666665, "eval_completions/mean_length": 694.9336954752604, "eval_completions/mean_terminated_length": 701.6080830891927, "eval_completions/min_length": 51.5, "eval_completions/min_terminated_length": 259.0, "eval_loss": 0.0, "eval_num_tokens": 211059718.0, "eval_reward": 0.8966165979703268, "eval_reward_std": 0.24938206871350607, "eval_rewards/accuracy_reward": 0.671006957689921, "eval_rewards/brier_reward": 0.7574487229188284, "eval_rewards/confidence_uniqueness_reward": 0.885144849618276, "eval_rewards/format_reward": 0.9904513855775198, "eval_rewards/frontier_aurc_reward": -0.001605092897079885, "eval_rewards/frontier_ece_reward": 0.006932677895141144, "eval_rewards/frontier_entropy_batch_reward": -0.9904513855775198, "eval_rewards/volume_coverage_0": -5.618685963541814e-11, "eval_rewards/volume_coverage_1": -5.618685963541814e-11, "eval_rewards/volume_coverage_10": -4.00475674685342e-10, "eval_rewards/volume_coverage_15": -1.2276121017443474e-09, "eval_rewards/volume_coverage_20": -1.3579284266774145e-09, "eval_rewards/volume_coverage_25": -2.1892862060261345e-09, "eval_rewards/volume_coverage_5": -5.618685963541814e-11, "eval_runtime": 182.1784, "eval_samples_per_second": 5.489, "eval_signal/accuracy_reward/centered_abs_mean": 0.4327799429496129, "eval_signal/accuracy_reward/group_std_mean": 0.47195852796236676, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21638997147480646, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21638997147480646, "eval_signal/advantage_abs_mean": 0.2223157857855161, "eval_signal/advantage_pre_scale_abs_mean": 0.2223157857855161, "eval_signal/advantage_pre_scale_std": 0.24726740519205728, "eval_signal/advantage_std": 0.24726740519205728, "eval_signal/brier_reward/centered_abs_mean": 0.2004946768283844, "eval_signal/brier_reward/group_std_mean": 0.25172630945841473, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02004946768283844, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02004946768283844, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.053160481775800385, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08579375346501668, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0053160480844477815, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0053160480844477815, "eval_signal/format_reward/centered_abs_mean": 0.018391926928112905, "eval_signal/format_reward/group_std_mean": 0.051025692683955036, "eval_signal/format_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963464056453, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963464056453, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020544196401412287, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0037250108628844223, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5680247138855822e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5680247138855822e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05547807924449444, "eval_signal/frontier_ece_reward/group_std_mean": 0.07796540856361389, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005547808172802131, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005547808172802131, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018391926928112905, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.051025692683955036, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018391927005723119, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018391927005723119, "eval_signal/volume_coverage_0/centered_abs_mean": 9.294568471355605e-10, "eval_signal/volume_coverage_0/group_std_mean": 1.2967965983453251e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.944444457689921, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.294568789388243e-11, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 9.294568789388243e-11, "eval_signal/volume_coverage_1/centered_abs_mean": 9.294568471355605e-10, "eval_signal/volume_coverage_1/group_std_mean": 1.2967965983453251e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.944444457689921, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.294568789388243e-11, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 9.294568789388243e-11, "eval_signal/volume_coverage_10/centered_abs_mean": 1.7053908031120872e-09, "eval_signal/volume_coverage_10/group_std_mean": 2.3690177065420017e-09, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.9166666865348816, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7053908372283155e-10, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.7053908372283155e-10, "eval_signal/volume_coverage_15/centered_abs_mean": 4.71717971397186e-09, "eval_signal/volume_coverage_15/group_std_mean": 6.529600547755532e-09, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.8333333532015482, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.717179271617374e-10, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 4.717179271617374e-10, "eval_signal/volume_coverage_20/centered_abs_mean": 1.316934451304578e-08, "eval_signal/volume_coverage_20/group_std_mean": 1.807888804402265e-08, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.750000019868215, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.3169344266715048e-09, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 1.3169344266715048e-09, "eval_signal/volume_coverage_25/centered_abs_mean": 3.255522570041099e-08, "eval_signal/volume_coverage_25/group_std_mean": 4.442350100738087e-08, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.6666666766007742, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.255522502039939e-09, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 3.255522502039939e-09, "eval_signal/volume_coverage_5/centered_abs_mean": 9.294568471355605e-10, "eval_signal/volume_coverage_5/group_std_mean": 1.2967965983453251e-09, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.944444457689921, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 9.294568789388243e-11, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 9.294568789388243e-11, "eval_steps_per_second": 0.033, "step": 100 }, { "calibration/aurc": 0.31272345586761635, "calibration/batch_distribution_entropy": 0.9726450472289901, "calibration/buffer_distribution_entropy": 0.9188371377258736, "calibration/confidence_entropy": 0.5335367788600449, "calibration/coverage@0%": 0.02269584371857099, "calibration/coverage@1%": 0.02269584371857099, "calibration/coverage@10%": 0.1300949839302112, "calibration/coverage@15%": 0.19966343631116357, "calibration/coverage@20%": 0.2285751068867524, "calibration/coverage@25%": 0.29510949842114387, "calibration/coverage@30%": 0.4522562592221083, "calibration/coverage@5%": 0.08565880668153394, "calibration/ece": 0.15692051533719759, "calibration/mean_confidence": 0.5591746107834116, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012934027777777768, "completions/max_length": 3715.6, "completions/max_terminated_length": 3715.6, "completions/mean_length": 702.8796997070312, "completions/mean_terminated_length": 712.0661743164062, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.2519968500393745, "grad_norm": 0.00048026847071014345, "learning_rate": 3.1024096385542172e-06, "loss": -0.011, "num_tokens": 222233756.0, "reward": 0.9744926452636719, "reward_std": 0.1430598109960556, "rewards/accuracy_reward": 0.6703125, "rewards/brier_reward": 0.7576799392700195, "rewards/confidence_uniqueness_reward": 0.939336609840393, "rewards/format_reward": 0.9868923544883728, "rewards/frontier_aurc_reward": -0.0014964503003284334, "rewards/frontier_ece_reward": 0.006954653561115265, "rewards/frontier_entropy_batch_reward": -0.24488165378570556, "rewards/volume_coverage_0": -2.528729010908837e-10, "rewards/volume_coverage_1": -2.528729010908837e-10, "rewards/volume_coverage_10": -8.753370007996697e-10, "rewards/volume_coverage_15": -1.2441597280199445e-09, "rewards/volume_coverage_20": -1.8202452739535246e-09, "rewards/volume_coverage_25": -4.066257730528378e-09, "rewards/volume_coverage_5": -2.528729010908837e-10, "signal/accuracy_reward/centered_abs_mean": 0.1658420145511627, "signal/accuracy_reward/group_std_mean": 0.21986316740512848, "signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08292100727558135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08292100727558135, "signal/advantage_abs_mean": 0.10654713213443756, "signal/advantage_pre_scale_abs_mean": 0.10654713213443756, "signal/advantage_pre_scale_std": 0.16769869327545167, "signal/advantage_std": 0.16769869327545167, "signal/brier_reward/centered_abs_mean": 0.16641111969947814, "signal/brier_reward/group_std_mean": 0.21107023060321808, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016641111671924592, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016641111671924592, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03133438862860203, "signal/confidence_uniqueness_reward/group_std_mean": 0.05064094811677933, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003133438853546977, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003133438853546977, "signal/format_reward/centered_abs_mean": 0.02077365480363369, "signal/format_reward/group_std_mean": 0.038099339604377745, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010386827401816845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010386827401816845, "signal/frontier_aurc_reward/centered_abs_mean": 0.001442649750970304, "signal/frontier_aurc_reward/group_std_mean": 0.0023628756869584324, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.803312261472456e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.803312261472456e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04892703369259834, "signal/frontier_ece_reward/group_std_mean": 0.06745585799217224, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048927033320069315, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048927033320069315, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.319078129529953, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39101446866989137, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03190781399607658, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03190781399607658, "signal/volume_coverage_0/centered_abs_mean": 1.384534620285649e-09, "signal/volume_coverage_0/group_std_mean": 1.8525299072535972e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9361111164093018, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.3845346188978702e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.3845346188978702e-10, "signal/volume_coverage_1/centered_abs_mean": 1.384534620285649e-09, "signal/volume_coverage_1/group_std_mean": 1.8525299072535972e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9361111164093018, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.3845346188978702e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.3845346188978702e-10, "signal/volume_coverage_10/centered_abs_mean": 2.9901749964977853e-09, "signal/volume_coverage_10/group_std_mean": 4.043323897917617e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.8861111164093017, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.9901749062921644e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.9901749062921644e-10, "signal/volume_coverage_15/centered_abs_mean": 7.97301115129656e-09, "signal/volume_coverage_15/group_std_mean": 1.0545502338032975e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.7861111044883728, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.973011320605572e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 7.973011320605572e-10, "signal/volume_coverage_20/centered_abs_mean": 1.6036694550081164e-08, "signal/volume_coverage_20/group_std_mean": 2.1090238888632484e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7138888955116272, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.6036695527077426e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.6036695527077426e-09, "signal/volume_coverage_25/centered_abs_mean": 6.605738356313395e-08, "signal/volume_coverage_25/group_std_mean": 8.634911559113334e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.5916666924953461, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 6.60573808985987e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 6.60573808985987e-09, "signal/volume_coverage_5/centered_abs_mean": 1.384534620285649e-09, "signal/volume_coverage_5/group_std_mean": 1.8525299072535972e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9361111164093018, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3845346188978702e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.3845346188978702e-10, "step": 105 }, { "calibration/aurc": 0.20659424723386605, "calibration/batch_distribution_entropy": 0.9763259743928365, "calibration/buffer_distribution_entropy": 0.9241441777845083, "calibration/confidence_entropy": 0.5103205997163243, "calibration/coverage@0%": 0.035508158669289906, "calibration/coverage@1%": 0.035508158669289906, "calibration/coverage@10%": 0.21171301071454668, "calibration/coverage@15%": 0.2847814132315914, "calibration/coverage@20%": 0.49886641517706865, "calibration/coverage@25%": 0.6624328588503937, "calibration/coverage@30%": 0.8575227068636678, "calibration/coverage@5%": 0.09811422916537346, "calibration/ece": 0.17949915830122162, "calibration/mean_confidence": 0.5596010988492515, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008593749999999978, "completions/max_length": 3468.4, "completions/max_terminated_length": 3468.4, "completions/mean_length": 720.1177124023437, "completions/mean_terminated_length": 726.3704467773438, "completions/min_length": 0.0, "completions/min_terminated_length": 211.6, "epoch": 0.2639967000412495, "grad_norm": 0.0004107690474484116, "learning_rate": 2.9518072289156627e-06, "loss": -0.0061, "num_tokens": 233637960.0, "reward": 0.9943800806999207, "reward_std": 0.13367998898029326, "rewards/accuracy_reward": 0.7108507037162781, "rewards/brier_reward": 0.7597374558448792, "rewards/confidence_uniqueness_reward": 0.9419220328330994, "rewards/format_reward": 0.9913194417953491, "rewards/frontier_aurc_reward": -0.0014706589048728348, "rewards/frontier_ece_reward": 0.0020732904551550744, "rewards/frontier_entropy_batch_reward": -0.27059880197048186, "rewards/volume_coverage_0": -2.5791634140903683e-10, "rewards/volume_coverage_1": -2.5791634140903683e-10, "rewards/volume_coverage_10": -4.4300731728841304e-10, "rewards/volume_coverage_15": -7.582023817814809e-10, "rewards/volume_coverage_20": -1.0568040151315295e-09, "rewards/volume_coverage_25": -4.2383120346656256e-09, "rewards/volume_coverage_5": -2.5791634140903683e-10, "signal/accuracy_reward/centered_abs_mean": 0.15238172709941863, "signal/accuracy_reward/group_std_mean": 0.20750262439250947, "signal/accuracy_reward/group_zero_std_frac": 0.3777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07619086354970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07619086354970932, "signal/advantage_abs_mean": 0.09959981143474579, "signal/advantage_pre_scale_abs_mean": 0.09959981143474579, "signal/advantage_pre_scale_std": 0.15816081762313844, "signal/advantage_std": 0.15816081762313844, "signal/brier_reward/centered_abs_mean": 0.1581783950328827, "signal/brier_reward/group_std_mean": 0.20072446763515472, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015817839279770853, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015817839279770853, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026568013057112694, "signal/confidence_uniqueness_reward/group_std_mean": 0.04261137619614601, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002656801464036107, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002656801464036107, "signal/format_reward/centered_abs_mean": 0.01472439244389534, "signal/format_reward/group_std_mean": 0.02825543247163296, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00736219622194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00736219622194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014523042133077978, "signal/frontier_aurc_reward/group_std_mean": 0.002319393353536725, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.815380292100599e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.815380292100599e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.045850321650505066, "signal/frontier_ece_reward/group_std_mean": 0.06342493891716003, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0045850323513150215, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0045850323513150215, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33311462998390196, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4045804440975189, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03331146351993084, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03331146351993084, "signal/volume_coverage_0/centered_abs_mean": 4.972203385134044e-10, "signal/volume_coverage_0/group_std_mean": 6.490914430945516e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9861111164093017, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.9722035603411153e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.9722035603411153e-11, "signal/volume_coverage_1/centered_abs_mean": 4.972203385134044e-10, "signal/volume_coverage_1/group_std_mean": 6.490914430945516e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9861111164093017, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.9722035603411153e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.9722035603411153e-11, "signal/volume_coverage_10/centered_abs_mean": 7.563527429366168e-10, "signal/volume_coverage_10/group_std_mean": 9.919285573478653e-10, "signal/volume_coverage_10/group_zero_std_frac": 0.9722222208976745, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.563527937640146e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.563527937640146e-11, "signal/volume_coverage_15/centered_abs_mean": 1.5424102620120728e-09, "signal/volume_coverage_15/group_std_mean": 2.014626616919646e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.9277777791023254, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.5424102906350102e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.5424102906350102e-10, "signal/volume_coverage_20/centered_abs_mean": 3.1644094095062413e-09, "signal/volume_coverage_20/group_std_mean": 4.138002096532034e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.8527777910232544, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.1644093812302487e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 3.1644093812302487e-10, "signal/volume_coverage_25/centered_abs_mean": 1.765938459907801e-08, "signal/volume_coverage_25/group_std_mean": 2.32815502876349e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.75, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.765938506537168e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.765938506537168e-09, "signal/volume_coverage_5/centered_abs_mean": 4.972203385134044e-10, "signal/volume_coverage_5/group_std_mean": 6.490914430945516e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.9861111164093017, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.9722035603411153e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.9722035603411153e-11, "step": 110 }, { "calibration/aurc": 0.32860315877388296, "calibration/batch_distribution_entropy": 0.977465822883542, "calibration/buffer_distribution_entropy": 0.929192606743824, "calibration/confidence_entropy": 0.5185033600393403, "calibration/coverage@0%": 0.008987521330787587, "calibration/coverage@1%": 0.008987521330787587, "calibration/coverage@10%": 0.07148127660801809, "calibration/coverage@15%": 0.12962492282248475, "calibration/coverage@20%": 0.25861813025028974, "calibration/coverage@25%": 0.47630181669848853, "calibration/coverage@30%": 0.5781705600900647, "calibration/coverage@5%": 0.0321454160676297, "calibration/ece": 0.19758046540227037, "calibration/mean_confidence": 0.5206799510790493, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111093, "completions/max_length": 3348.8, "completions/max_terminated_length": 3348.8, "completions/mean_length": 720.4413208007812, "completions/mean_terminated_length": 730.9393432617187, "completions/min_length": 0.0, "completions/min_terminated_length": 194.8, "epoch": 0.27599655004312446, "grad_norm": 0.0003260687808506191, "learning_rate": 2.8012048192771087e-06, "loss": -0.0109, "num_tokens": 245016644.0, "reward": 0.9704742431640625, "reward_std": 0.1369811251759529, "rewards/accuracy_reward": 0.6642361164093018, "rewards/brier_reward": 0.7601930141448975, "rewards/confidence_uniqueness_reward": 0.9377529859542847, "rewards/format_reward": 0.9857638835906982, "rewards/frontier_aurc_reward": -0.0015619280282407999, "rewards/frontier_ece_reward": 0.007641966454684734, "rewards/frontier_entropy_batch_reward": -0.2506504714488983, "rewards/volume_coverage_0": 1.3546747751336242e-11, "rewards/volume_coverage_1": 1.3546747751336242e-11, "rewards/volume_coverage_10": -3.578200222231764e-10, "rewards/volume_coverage_15": -9.904014358630419e-10, "rewards/volume_coverage_20": -2.8937101603787595e-10, "rewards/volume_coverage_25": 8.38249629986354e-09, "rewards/volume_coverage_5": 1.3546747751336242e-11, "signal/accuracy_reward/centered_abs_mean": 0.15470920205116273, "signal/accuracy_reward/group_std_mean": 0.20263281166553498, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07735460102558137, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07735460102558137, "signal/advantage_abs_mean": 0.10298931151628495, "signal/advantage_pre_scale_abs_mean": 0.10298931151628495, "signal/advantage_pre_scale_std": 0.16549083590507507, "signal/advantage_std": 0.16549083590507507, "signal/brier_reward/centered_abs_mean": 0.15902409851551055, "signal/brier_reward/group_std_mean": 0.20213948488235473, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015902410633862017, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015902410633862017, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03237666189670563, "signal/confidence_uniqueness_reward/group_std_mean": 0.05298796966671944, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003237666329368949, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003237666329368949, "signal/format_reward/centered_abs_mean": 0.02151692695915699, "signal/format_reward/group_std_mean": 0.04006081186234951, "signal/format_reward/group_zero_std_frac": 0.8361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010758463479578494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010758463479578494, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014225503196939825, "signal/frontier_aurc_reward/group_std_mean": 0.0022425684612244366, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.77818792508333e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.77818792508333e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04533173516392708, "signal/frontier_ece_reward/group_std_mean": 0.06232137307524681, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004533173609524965, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004533173609524965, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3208661198616028, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39007292985916137, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032086612284183504, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032086612284183504, "signal/volume_coverage_0/centered_abs_mean": 1.1528306809172051e-09, "signal/volume_coverage_0/group_std_mean": 1.4859115216037111e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9666666626930237, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1528306122221555e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.1528306122221555e-10, "signal/volume_coverage_1/centered_abs_mean": 1.1528306809172051e-09, "signal/volume_coverage_1/group_std_mean": 1.4859115216037111e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9666666626930237, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1528306122221555e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.1528306122221555e-10, "signal/volume_coverage_10/centered_abs_mean": 2.885503325167349e-09, "signal/volume_coverage_10/group_std_mean": 3.783502866561151e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.885503572191972e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.885503572191972e-10, "signal/volume_coverage_15/centered_abs_mean": 5.144241055354826e-09, "signal/volume_coverage_15/group_std_mean": 6.739672198108337e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.8416666626930237, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.144241116417092e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.144241116417092e-10, "signal/volume_coverage_20/centered_abs_mean": 1.0344034517828504e-08, "signal/volume_coverage_20/group_std_mean": 1.3228679662802278e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7916666507720947, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0344034573339656e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 1.0344034573339656e-09, "signal/volume_coverage_25/centered_abs_mean": 3.0012533969170366e-08, "signal/volume_coverage_25/group_std_mean": 3.899686404906788e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.6666666865348816, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.0012535123802307e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.0012535123802307e-09, "signal/volume_coverage_5/centered_abs_mean": 1.1528306809172051e-09, "signal/volume_coverage_5/group_std_mean": 1.4859115216037111e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9666666626930237, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1528306122221555e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.1528306122221555e-10, "step": 115 }, { "calibration/aurc": 0.33500062539729397, "calibration/batch_distribution_entropy": 0.956171359928593, "calibration/buffer_distribution_entropy": 0.9341393129252076, "calibration/confidence_entropy": 0.5175410523940116, "calibration/coverage@0%": 0.007331317850762675, "calibration/coverage@1%": 0.007331317850762675, "calibration/coverage@10%": 0.02520131922495534, "calibration/coverage@15%": 0.23602583482204204, "calibration/coverage@20%": 0.42010581283495946, "calibration/coverage@25%": 0.5450955063899958, "calibration/coverage@30%": 0.5738903394255874, "calibration/coverage@5%": 0.007331317850762675, "calibration/ece": 0.17458800836962912, "calibration/mean_confidence": 0.5967470814550844, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009982638888888885, "completions/max_length": 3088.2, "completions/max_terminated_length": 3088.2, "completions/mean_length": 718.2924560546875, "completions/mean_terminated_length": 725.5123291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 247.0, "epoch": 0.28799640004499943, "grad_norm": 0.0002902206324506551, "learning_rate": 2.6506024096385547e-06, "loss": -0.0084, "num_tokens": 256373229.0, "reward": 0.9828748226165771, "reward_std": 0.13469320088624953, "rewards/accuracy_reward": 0.686024296283722, "rewards/brier_reward": 0.7709903001785279, "rewards/confidence_uniqueness_reward": 0.940601646900177, "rewards/format_reward": 0.9899305582046509, "rewards/frontier_aurc_reward": -0.0016850390122272075, "rewards/frontier_ece_reward": 0.0074638242833316324, "rewards/frontier_entropy_batch_reward": -0.2698711782693863, "rewards/volume_coverage_0": -1.165113189954825e-10, "rewards/volume_coverage_1": -1.165113189954825e-10, "rewards/volume_coverage_10": -3.5378330993929286e-10, "rewards/volume_coverage_15": -3.960055356549974e-10, "rewards/volume_coverage_20": -5.185866753382173e-10, "rewards/volume_coverage_25": -1.3467162430363944e-09, "rewards/volume_coverage_5": -4.011809402043598e-10, "signal/accuracy_reward/centered_abs_mean": 0.15410698801279069, "signal/accuracy_reward/group_std_mean": 0.20190810561180114, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07705349400639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07705349400639534, "signal/advantage_abs_mean": 0.10197492688894272, "signal/advantage_pre_scale_abs_mean": 0.10197492688894272, "signal/advantage_pre_scale_std": 0.16204161643981935, "signal/advantage_std": 0.16204161643981935, "signal/brier_reward/centered_abs_mean": 0.1546614795923233, "signal/brier_reward/group_std_mean": 0.19821046888828278, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015466148406267166, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015466148406267166, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02829754091799259, "signal/confidence_uniqueness_reward/group_std_mean": 0.04569016918540001, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028297540731728075, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028297540731728075, "signal/format_reward/centered_abs_mean": 0.01647135429084301, "signal/format_reward/group_std_mean": 0.03150532059371471, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008235677145421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008235677145421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016746392473578453, "signal/frontier_aurc_reward/group_std_mean": 0.002678707940503955, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0932990810251795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0932990810251795e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0423102580010891, "signal/frontier_ece_reward/group_std_mean": 0.05887846276164055, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004231025744229555, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004231025744229555, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33146599531173704, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4032399892807007, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03314659893512726, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03314659893512726, "signal/volume_coverage_0/centered_abs_mean": 1.1543410685654897e-09, "signal/volume_coverage_0/group_std_mean": 1.4904493889078552e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9472222208976746, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1543411222551813e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.1543411222551813e-10, "signal/volume_coverage_1/centered_abs_mean": 1.1543410685654897e-09, "signal/volume_coverage_1/group_std_mean": 1.4904493889078552e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9472222208976746, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1543411222551813e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.1543411222551813e-10, "signal/volume_coverage_10/centered_abs_mean": 3.826916784854806e-09, "signal/volume_coverage_10/group_std_mean": 4.964569447207801e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9361111044883728, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.8269166720110437e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.8269166720110437e-10, "signal/volume_coverage_15/centered_abs_mean": 4.632062246268109e-09, "signal/volume_coverage_15/group_std_mean": 6.021744061723844e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.9305555582046509, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.632062288855571e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 4.632062288855571e-10, "signal/volume_coverage_20/centered_abs_mean": 8.57383582464344e-09, "signal/volume_coverage_20/group_std_mean": 1.1109397090452999e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.8388888835906982, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.573836485399611e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 8.573836485399611e-10, "signal/volume_coverage_25/centered_abs_mean": 2.939619259678672e-08, "signal/volume_coverage_25/group_std_mean": 3.8684455194032405e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.8027777791023254, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.9396191353510404e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.9396191353510404e-09, "signal/volume_coverage_5/centered_abs_mean": 2.215817353212746e-09, "signal/volume_coverage_5/group_std_mean": 2.8131893509142357e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9444444417953491, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.215817306982365e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.215817306982365e-10, "step": 120 }, { "calibration/aurc": 0.1915708186268675, "calibration/batch_distribution_entropy": 0.9721579261742956, "calibration/buffer_distribution_entropy": 0.9370939672508353, "calibration/confidence_entropy": 0.5153446337077295, "calibration/coverage@0%": 0.031854188571992154, "calibration/coverage@1%": 0.031854188571992154, "calibration/coverage@10%": 0.24795650273649508, "calibration/coverage@15%": 0.3973872063858091, "calibration/coverage@20%": 0.5275370559863992, "calibration/coverage@25%": 0.7552314671774273, "calibration/coverage@30%": 0.8777631923220005, "calibration/coverage@5%": 0.16669963012195738, "calibration/ece": 0.1816904653658499, "calibration/mean_confidence": 0.5460239694308244, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008854166666666673, "completions/max_length": 3260.4, "completions/max_terminated_length": 3260.4, "completions/mean_length": 735.0815063476563, "completions/mean_terminated_length": 741.7195556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 224.6, "epoch": 0.2999962500468744, "grad_norm": 0.00040568591793999076, "learning_rate": 2.5e-06, "loss": -0.0081, "num_tokens": 267959032.0, "reward": 0.9793904066085816, "reward_std": 0.13277052342891693, "rewards/accuracy_reward": 0.6761284828186035, "rewards/brier_reward": 0.7679849863052368, "rewards/confidence_uniqueness_reward": 0.9425747990608215, "rewards/format_reward": 0.9911458253860473, "rewards/frontier_aurc_reward": -0.0013498676475137473, "rewards/frontier_ece_reward": 0.006347721349447966, "rewards/frontier_entropy_batch_reward": -0.259206211566925, "rewards/volume_coverage_0": -8.512854388423308e-11, "rewards/volume_coverage_1": -8.512854388423308e-11, "rewards/volume_coverage_10": -8.512854388423308e-11, "rewards/volume_coverage_15": -2.287901209631349e-10, "rewards/volume_coverage_20": -6.88414039662355e-10, "rewards/volume_coverage_25": -7.95366701389355e-09, "rewards/volume_coverage_5": -8.512854388423308e-11, "signal/accuracy_reward/centered_abs_mean": 0.155517578125, "signal/accuracy_reward/group_std_mean": 0.2059672147035599, "signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777587890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0777587890625, "signal/advantage_abs_mean": 0.10053557455539704, "signal/advantage_pre_scale_abs_mean": 0.10053557455539704, "signal/advantage_pre_scale_std": 0.15793273150920867, "signal/advantage_std": 0.15793273150920867, "signal/brier_reward/centered_abs_mean": 0.15555652379989623, "signal/brier_reward/group_std_mean": 0.1986626386642456, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015555652230978012, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015555652230978012, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026127389818429946, "signal/confidence_uniqueness_reward/group_std_mean": 0.04029006510972977, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002612739009782672, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002612739009782672, "signal/format_reward/centered_abs_mean": 0.01452907994389534, "signal/format_reward/group_std_mean": 0.026035796478390695, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00726453997194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00726453997194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013476456748321652, "signal/frontier_aurc_reward/group_std_mean": 0.002196387154981494, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.68455708262627e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.68455708262627e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.043793865293264386, "signal/frontier_ece_reward/group_std_mean": 0.05965333953499794, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0043793866410851475, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0043793866410851475, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32110402584075926, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39466953873634336, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03211040273308754, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03211040273308754, "signal/volume_coverage_0/centered_abs_mean": 2.513125332836985e-10, "signal/volume_coverage_0/group_std_mean": 3.27953761369848e-10, "signal/volume_coverage_0/group_zero_std_frac": 1.0, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_1/centered_abs_mean": 2.513125332836985e-10, "signal/volume_coverage_1/group_std_mean": 3.27953761369848e-10, "signal/volume_coverage_1/group_zero_std_frac": 1.0, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_10/centered_abs_mean": 2.513125332836985e-10, "signal/volume_coverage_10/group_std_mean": 3.27953761369848e-10, "signal/volume_coverage_10/group_zero_std_frac": 1.0, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_15/centered_abs_mean": 6.385222087179621e-10, "signal/volume_coverage_15/group_std_mean": 8.169314752448642e-10, "signal/volume_coverage_15/group_zero_std_frac": 0.9833333373069764, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.385222038607363e-11, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.385222038607363e-11, "signal/volume_coverage_20/centered_abs_mean": 5.783205347942299e-09, "signal/volume_coverage_20/group_std_mean": 7.703742019304904e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.8611111044883728, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.783205683437819e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 5.783205683437819e-10, "signal/volume_coverage_25/centered_abs_mean": 3.0315699994787335e-08, "signal/volume_coverage_25/group_std_mean": 4.013403822611394e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7388888835906983, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.0315701338157197e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.0315701338157197e-09, "signal/volume_coverage_5/centered_abs_mean": 2.513125332836985e-10, "signal/volume_coverage_5/group_std_mean": 3.27953761369848e-10, "signal/volume_coverage_5/group_zero_std_frac": 1.0, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.5131253675314547e-11, "step": 125 }, { "calibration/aurc": 0.2339408439391594, "calibration/batch_distribution_entropy": 0.961508234381926, "calibration/buffer_distribution_entropy": 0.9402565004280987, "calibration/confidence_entropy": 0.4876715529200112, "calibration/coverage@0%": 0.026255400448416892, "calibration/coverage@1%": 0.026255400448416892, "calibration/coverage@10%": 0.17620168770352224, "calibration/coverage@15%": 0.30992770755313526, "calibration/coverage@20%": 0.4616251066347946, "calibration/coverage@25%": 0.5897109719923753, "calibration/coverage@30%": 0.6721060609134154, "calibration/coverage@5%": 0.08652022772895099, "calibration/ece": 0.11093919101297997, "calibration/mean_confidence": 0.5869900882516286, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013802083333333348, "completions/max_length": 3537.0, "completions/max_terminated_length": 3537.0, "completions/mean_length": 775.1760620117187, "completions/mean_terminated_length": 786.0658081054687, "completions/min_length": 0.0, "completions/min_terminated_length": 215.4, "epoch": 0.3119961000487494, "grad_norm": 0.0003663992101792246, "learning_rate": 2.349397590361446e-06, "loss": -0.0105, "num_tokens": 280013860.0, "reward": 0.9698229193687439, "reward_std": 0.14345374405384065, "rewards/accuracy_reward": 0.6619791626930237, "rewards/brier_reward": 0.7658138871192932, "rewards/confidence_uniqueness_reward": 0.9374646186828614, "rewards/format_reward": 0.9858506798744202, "rewards/frontier_aurc_reward": -0.0014807431260123848, "rewards/frontier_ece_reward": 0.008980327052995563, "rewards/frontier_entropy_batch_reward": -0.2529941201210022, "rewards/volume_coverage_0": -1.795900022444341e-10, "rewards/volume_coverage_1": -1.795900022444341e-10, "rewards/volume_coverage_10": 3.8563873101804536e-10, "rewards/volume_coverage_15": 7.631051734957594e-10, "rewards/volume_coverage_20": 2.1685917506175656e-09, "rewards/volume_coverage_25": 3.7465909930034515e-09, "rewards/volume_coverage_5": -1.795900022444341e-10, "signal/accuracy_reward/centered_abs_mean": 0.17249349057674407, "signal/accuracy_reward/group_std_mean": 0.223880273103714, "signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08624674528837203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08624674528837203, "signal/advantage_abs_mean": 0.10982311069965363, "signal/advantage_pre_scale_abs_mean": 0.10982311069965363, "signal/advantage_pre_scale_std": 0.16914137005805968, "signal/advantage_std": 0.16914137005805968, "signal/brier_reward/centered_abs_mean": 0.1637795925140381, "signal/brier_reward/group_std_mean": 0.20636882185935973, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01637795865535736, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01637795865535736, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03288912586867809, "signal/confidence_uniqueness_reward/group_std_mean": 0.051013688743114474, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003288912633433938, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003288912633433938, "signal/format_reward/centered_abs_mean": 0.02181532122194767, "signal/format_reward/group_std_mean": 0.037739118188619615, "signal/format_reward/group_zero_std_frac": 0.8499999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010907660610973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010907660610973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015587236033752561, "signal/frontier_aurc_reward/group_std_mean": 0.0024250032845884563, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9484045697026887e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9484045697026887e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04549378007650375, "signal/frontier_ece_reward/group_std_mean": 0.061597873270511624, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004549377970397473, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004549377970397473, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.314960116147995, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38633153438568113, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0314960103482008, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0314960103482008, "signal/volume_coverage_0/centered_abs_mean": 8.251522298774461e-10, "signal/volume_coverage_0/group_std_mean": 1.1054033466351142e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9638888955116272, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.251521826929675e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.251521826929675e-11, "signal/volume_coverage_1/centered_abs_mean": 8.251522298774461e-10, "signal/volume_coverage_1/group_std_mean": 1.1054033466351142e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9638888955116272, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.251521826929675e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.251521826929675e-11, "signal/volume_coverage_10/centered_abs_mean": 2.2939792010401305e-09, "signal/volume_coverage_10/group_std_mean": 3.0097725312039357e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9138888835906982, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.2939793103277096e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.2939793103277096e-10, "signal/volume_coverage_15/centered_abs_mean": 3.3565182355754787e-09, "signal/volume_coverage_15/group_std_mean": 4.396137531781364e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.8833333253860474, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.356518269923003e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.356518269923003e-10, "signal/volume_coverage_20/centered_abs_mean": 2.0850407000949645e-08, "signal/volume_coverage_20/group_std_mean": 2.83504333786766e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.7777778029441833, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.0850406567962663e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 2.0850406567962663e-09, "signal/volume_coverage_25/centered_abs_mean": 4.885320112890668e-08, "signal/volume_coverage_25/group_std_mean": 6.617217529480968e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.6944444656372071, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 4.885319837555358e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 4.885319837555358e-09, "signal/volume_coverage_5/centered_abs_mean": 8.251522298774461e-10, "signal/volume_coverage_5/group_std_mean": 1.1054033466351142e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9638888955116272, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.251521826929675e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 8.251521826929675e-11, "step": 130 }, { "calibration/aurc": 0.21908669961435798, "calibration/batch_distribution_entropy": 0.9311462546505785, "calibration/buffer_distribution_entropy": 0.9416415589799986, "calibration/confidence_entropy": 0.4657020648208376, "calibration/coverage@0%": 0.03239955997814936, "calibration/coverage@1%": 0.03239955997814936, "calibration/coverage@10%": 0.31323954224765294, "calibration/coverage@15%": 0.36411866007444027, "calibration/coverage@20%": 0.4666335057409091, "calibration/coverage@25%": 0.6104226811960738, "calibration/coverage@30%": 0.7054088223168888, "calibration/coverage@5%": 0.21509016281502874, "calibration/ece": 0.17209419891679043, "calibration/mean_confidence": 0.6375476748236272, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011024305555555558, "completions/max_length": 3435.4, "completions/max_terminated_length": 3435.4, "completions/mean_length": 770.680126953125, "completions/mean_terminated_length": 779.2688720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.32399595005062437, "grad_norm": 0.000423381949076429, "learning_rate": 2.1987951807228917e-06, "loss": -0.0083, "num_tokens": 291985119.0, "reward": 0.9828216791152954, "reward_std": 0.13931744694709777, "rewards/accuracy_reward": 0.6855034589767456, "rewards/brier_reward": 0.775595772266388, "rewards/confidence_uniqueness_reward": 0.9389704704284668, "rewards/format_reward": 0.9888888955116272, "rewards/frontier_aurc_reward": -0.0015716422349214554, "rewards/frontier_ece_reward": 0.008903909381479025, "rewards/frontier_entropy_batch_reward": -0.2670187473297119, "rewards/volume_coverage_0": -1.3643474848154536e-11, "rewards/volume_coverage_1": -1.3643474848154536e-11, "rewards/volume_coverage_10": -3.4253438757278486e-11, "rewards/volume_coverage_15": -3.859347159126253e-10, "rewards/volume_coverage_20": 7.330478162849429e-10, "rewards/volume_coverage_25": 1.211570410664109e-09, "rewards/volume_coverage_5": -1.3643474848154536e-11, "signal/accuracy_reward/centered_abs_mean": 0.16392686516046523, "signal/accuracy_reward/group_std_mean": 0.21705200970172883, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08196343258023261, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08196343258023261, "signal/advantage_abs_mean": 0.10422161519527436, "signal/advantage_pre_scale_abs_mean": 0.10422161519527436, "signal/advantage_pre_scale_std": 0.1652237981557846, "signal/advantage_std": 0.1652237981557846, "signal/brier_reward/centered_abs_mean": 0.1610366255044937, "signal/brier_reward/group_std_mean": 0.20538146793842316, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016103663109242917, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016103663109242917, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030462343245744705, "signal/confidence_uniqueness_reward/group_std_mean": 0.048246100544929504, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030462343711405993, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030462343711405993, "signal/format_reward/centered_abs_mean": 0.01842447929084301, "signal/format_reward/group_std_mean": 0.03367025405168533, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009212239645421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009212239645421505, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018255119677633048, "signal/frontier_aurc_reward/group_std_mean": 0.002912291418761015, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2818900833954104e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2818900833954104e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04268218874931336, "signal/frontier_ece_reward/group_std_mean": 0.05850542336702347, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004268218902871013, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004268218902871013, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31653188467025756, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38656463027000426, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03165318816900253, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03165318816900253, "signal/volume_coverage_0/centered_abs_mean": 4.996748605157197e-10, "signal/volume_coverage_0/group_std_mean": 6.554245639278644e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.996748501073789e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 4.996748501073789e-11, "signal/volume_coverage_1/centered_abs_mean": 4.996748605157197e-10, "signal/volume_coverage_1/group_std_mean": 6.554245639278644e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.996748501073789e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 4.996748501073789e-11, "signal/volume_coverage_10/centered_abs_mean": 1.0024358822191547e-09, "signal/volume_coverage_10/group_std_mean": 1.3108416178908567e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9666666746139526, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.002435957159209e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.002435957159209e-10, "signal/volume_coverage_15/centered_abs_mean": 2.1058243517002584e-09, "signal/volume_coverage_15/group_std_mean": 2.7569074960431817e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.925000011920929, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1058243517002584e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.1058243517002584e-10, "signal/volume_coverage_20/centered_abs_mean": 7.328173268206228e-09, "signal/volume_coverage_20/group_std_mean": 9.528951983028833e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.8055555582046509, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.328173290410689e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 7.328173290410689e-10, "signal/volume_coverage_25/centered_abs_mean": 1.6980138717315187e-08, "signal/volume_coverage_25/group_std_mean": 2.177776963208089e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7194444537162781, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.6980139905253821e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 1.6980139905253821e-09, "signal/volume_coverage_5/centered_abs_mean": 4.996748605157197e-10, "signal/volume_coverage_5/group_std_mean": 6.554245639278644e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.996748501073789e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.996748501073789e-11, "step": 135 }, { "calibration/aurc": 0.18688488282444396, "calibration/batch_distribution_entropy": 0.9612602498585673, "calibration/buffer_distribution_entropy": 0.946935134946479, "calibration/confidence_entropy": 0.4878221424685513, "calibration/coverage@0%": 0.010068253342126169, "calibration/coverage@1%": 0.010068253342126169, "calibration/coverage@10%": 0.15910374872918792, "calibration/coverage@15%": 0.449369630860614, "calibration/coverage@20%": 0.559990037943999, "calibration/coverage@25%": 0.8223586188910149, "calibration/coverage@30%": 0.9565088720029576, "calibration/coverage@5%": 0.09463446205940726, "calibration/ece": 0.16027606652347887, "calibration/mean_confidence": 0.588729914691588, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111116, "completions/max_length": 3632.4, "completions/max_terminated_length": 3632.4, "completions/mean_length": 776.3033935546875, "completions/mean_terminated_length": 787.4848510742188, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.33599580005249935, "grad_norm": 0.00032712117535993457, "learning_rate": 2.0481927710843377e-06, "loss": -0.0098, "num_tokens": 304032358.0, "reward": 0.9728388667106629, "reward_std": 0.13903119266033173, "rewards/accuracy_reward": 0.6722222208976746, "rewards/brier_reward": 0.7670191049575805, "rewards/confidence_uniqueness_reward": 0.9365373611450195, "rewards/format_reward": 0.9854166746139527, "rewards/frontier_aurc_reward": -0.001542026223614812, "rewards/frontier_ece_reward": 0.0071903283707797526, "rewards/frontier_entropy_batch_reward": -0.2703594654798508, "rewards/volume_coverage_0": -5.332067799443396e-11, "rewards/volume_coverage_1": -5.332067799443396e-11, "rewards/volume_coverage_10": -1.9090122772499884e-10, "rewards/volume_coverage_15": -4.564347600398422e-10, "rewards/volume_coverage_20": -6.339565656286739e-11, "rewards/volume_coverage_25": -2.0095051789237318e-09, "rewards/volume_coverage_5": -1.506728820703518e-10, "signal/accuracy_reward/centered_abs_mean": 0.15435112714767457, "signal/accuracy_reward/group_std_mean": 0.20686088502407074, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07717556357383729, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07717556357383729, "signal/advantage_abs_mean": 0.10415131747722625, "signal/advantage_pre_scale_abs_mean": 0.10415131747722625, "signal/advantage_pre_scale_std": 0.16518069207668304, "signal/advantage_std": 0.16518069207668304, "signal/brier_reward/centered_abs_mean": 0.15760179460048676, "signal/brier_reward/group_std_mean": 0.2008292406797409, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015760179981589316, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015760179981589316, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033130045235157016, "signal/confidence_uniqueness_reward/group_std_mean": 0.05183272436261177, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033130045514553784, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033130045514553784, "signal/format_reward/centered_abs_mean": 0.021896701864898206, "signal/format_reward/group_std_mean": 0.038436245545744895, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010948350932449103, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010948350932449103, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016865363577380776, "signal/frontier_aurc_reward/group_std_mean": 0.0028139258734881877, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.108170556311961e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.108170556311961e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.040233349055051805, "signal/frontier_ece_reward/group_std_mean": 0.05556822866201401, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004023334989324212, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004023334989324212, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33216995000839233, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40116575360298157, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03321699649095535, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03321699649095535, "signal/volume_coverage_0/centered_abs_mean": 5.590514454145712e-10, "signal/volume_coverage_0/group_std_mean": 7.51527159253973e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9833333373069764, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.590514960684967e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.590514960684967e-11, "signal/volume_coverage_1/centered_abs_mean": 5.590514454145712e-10, "signal/volume_coverage_1/group_std_mean": 7.51527159253973e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9833333373069764, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.590514960684967e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.590514960684967e-11, "signal/volume_coverage_10/centered_abs_mean": 1.7093801332745962e-09, "signal/volume_coverage_10/group_std_mean": 2.306350740249874e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.9416666626930237, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7093802172352123e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.7093802172352123e-10, "signal/volume_coverage_15/centered_abs_mean": 3.268383494403615e-09, "signal/volume_coverage_15/group_std_mean": 4.34988126907232e-09, "signal/volume_coverage_15/group_zero_std_frac": 0.9111111044883728, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.2683836219057906e-10, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.2683836219057906e-10, "signal/volume_coverage_20/centered_abs_mean": 7.0756116826720115e-09, "signal/volume_coverage_20/group_std_mean": 9.405933146244828e-09, "signal/volume_coverage_20/group_zero_std_frac": 0.8527777910232544, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.075611883726462e-10, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 7.075611883726462e-10, "signal/volume_coverage_25/centered_abs_mean": 3.317895354015832e-08, "signal/volume_coverage_25/group_std_mean": 4.440126604166039e-08, "signal/volume_coverage_25/group_zero_std_frac": 0.7277777791023254, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.317895497234602e-09, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 3.317895497234602e-09, "signal/volume_coverage_5/centered_abs_mean": 1.0367738181860099e-09, "signal/volume_coverage_5/group_std_mean": 1.4120832135533501e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9722222208976745, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.036773857737705e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.036773857737705e-10, "step": 140 }, { "calibration/aurc": 0.17861280601130797, "calibration/batch_distribution_entropy": 0.9852219106359359, "calibration/buffer_distribution_entropy": 0.9583941655173911, "calibration/confidence_entropy": 0.5137185398950166, "calibration/coverage@0%": 0.023177774456331256, "calibration/coverage@1%": 0.023177774456331256, "calibration/coverage@10%": 0.32692182191463137, "calibration/coverage@15%": 0.4893533218131497, "calibration/coverage@20%": 0.6518234649790168, "calibration/coverage@25%": 0.7622667218699385, "calibration/coverage@30%": 0.8429098343670999, "calibration/coverage@5%": 0.10116614285484485, "calibration/ece": 0.17461347138091166, "calibration/mean_confidence": 0.4975835279157047, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01206597222222221, "completions/max_length": 3274.2, "completions/max_terminated_length": 3274.2, "completions/mean_length": 787.4185913085937, "completions/mean_terminated_length": 797.0513305664062, "completions/min_length": 0.0, "completions/min_terminated_length": 265.0, "epoch": 0.34799565005437433, "grad_norm": 0.0003231425362173468, "learning_rate": 1.8975903614457832e-06, "loss": -0.0103, "num_tokens": 316168028.0, "reward": 0.9967975854873657, "reward_std": 0.12998930513858795, "rewards/accuracy_reward": 0.7133680582046509, "rewards/brier_reward": 0.7639730095863342, "rewards/confidence_uniqueness_reward": 0.9404920935630798, "rewards/format_reward": 0.9879340171813965, "rewards/frontier_aurc_reward": -0.001185076031833887, "rewards/frontier_ece_reward": 0.0006924874149262905, "rewards/frontier_entropy_batch_reward": -0.24353820085525513, "rewards/volume_coverage_0": -3.5014671162514335e-10, "rewards/volume_coverage_1": -3.5014671162514335e-10, "rewards/volume_coverage_10": -1.1576934522139481e-09, "rewards/volume_coverage_15": -8.61750595604338e-09, "rewards/volume_coverage_20": -1.8163441106722188e-08, "rewards/volume_coverage_25": -5.915410927265219e-06, "rewards/volume_coverage_5": -3.5014671162514335e-10, "signal/accuracy_reward/centered_abs_mean": 0.13902994692325593, "signal/accuracy_reward/group_std_mean": 0.1906949073076248, "signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06951497346162797, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06951497346162797, "signal/advantage_abs_mean": 0.094663205742836, "signal/advantage_pre_scale_abs_mean": 0.094663205742836, "signal/advantage_pre_scale_std": 0.1574291467666626, "signal/advantage_std": 0.1574291467666626, "signal/brier_reward/centered_abs_mean": 0.15350565910339356, "signal/brier_reward/group_std_mean": 0.19510267674922943, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015350566431879997, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015350566431879997, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03119339495897293, "signal/confidence_uniqueness_reward/group_std_mean": 0.050634662806987765, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003119339654222131, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003119339654222131, "signal/format_reward/centered_abs_mean": 0.02065429650247097, "signal/format_reward/group_std_mean": 0.03805244565010071, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010327148251235485, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010327148251235485, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013626172440126538, "signal/frontier_aurc_reward/group_std_mean": 0.0022952609695494177, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7032716095854993e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7032716095854993e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03810315653681755, "signal/frontier_ece_reward/group_std_mean": 0.051458243280649185, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00381031590513885, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00381031590513885, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32137046456336976, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3931168556213379, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0321370467543602, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0321370467543602, "signal/volume_coverage_0/centered_abs_mean": 2.1893566043429403e-09, "signal/volume_coverage_0/group_std_mean": 2.8316428202224133e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.9027777791023255, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.1893565779751434e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 2.1893565779751434e-10, "signal/volume_coverage_1/centered_abs_mean": 2.1893566043429403e-09, "signal/volume_coverage_1/group_std_mean": 2.8316428202224133e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.9027777791023255, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.1893565779751434e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 2.1893565779751434e-10, "signal/volume_coverage_10/centered_abs_mean": 4.7582652901168035e-09, "signal/volume_coverage_10/group_std_mean": 6.146304565302074e-09, "signal/volume_coverage_10/group_zero_std_frac": 0.8388888955116272, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.758265607918144e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 4.758265607918144e-10, "signal/volume_coverage_15/centered_abs_mean": 3.597350177741987e-08, "signal/volume_coverage_15/group_std_mean": 4.588340245526012e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.7333333373069764, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.597350033135438e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 3.597350033135438e-09, "signal/volume_coverage_20/centered_abs_mean": 7.640569981148815e-08, "signal/volume_coverage_20/group_std_mean": 9.747053044684151e-08, "signal/volume_coverage_20/group_zero_std_frac": 0.6500000059604645, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.64056946600533e-09, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 7.64056946600533e-09, "signal/volume_coverage_25/centered_abs_mean": 2.985676190974118e-05, "signal/volume_coverage_25/group_std_mean": 3.802538181005843e-05, "signal/volume_coverage_25/group_zero_std_frac": 0.4666666775941849, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.98567600873767e-06, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 2.98567600873767e-06, "signal/volume_coverage_5/centered_abs_mean": 2.1893566043429403e-09, "signal/volume_coverage_5/group_std_mean": 2.8316428202224133e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.9027777791023255, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.1893565779751434e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.1893565779751434e-10, "step": 145 }, { "calibration/aurc": 0.18029917566613354, "calibration/batch_distribution_entropy": 0.9609287430066871, "calibration/buffer_distribution_entropy": 0.9680553694533005, "calibration/confidence_entropy": 0.5266669694591191, "calibration/coverage@0%": 0.045315954372948296, "calibration/coverage@1%": 0.045315954372948296, "calibration/coverage@10%": 0.42797529882281715, "calibration/coverage@15%": 0.5210627786776989, "calibration/coverage@20%": 0.5992149526139929, "calibration/coverage@25%": 0.6996878254078915, "calibration/coverage@30%": 0.7548685125189165, "calibration/coverage@5%": 0.17942530214136124, "calibration/ece": 0.1919776915795625, "calibration/mean_confidence": 0.5383348858104606, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01128472222222221, "completions/max_length": 3553.8, "completions/max_terminated_length": 3553.8, "completions/mean_length": 858.7107666015625, "completions/mean_terminated_length": 868.437451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 257.2, "epoch": 0.3599955000562493, "grad_norm": 0.00037785363383591175, "learning_rate": 1.7469879518072292e-06, "loss": -0.0076, "num_tokens": 329170712.0, "reward": 0.9839182257652282, "reward_std": 0.13611459136009216, "rewards/accuracy_reward": 0.6901041626930237, "rewards/brier_reward": 0.7852767705917358, "rewards/confidence_uniqueness_reward": 0.9386167883872986, "rewards/format_reward": 0.9887152671813965, "rewards/frontier_aurc_reward": -0.0013611266971565784, "rewards/frontier_ece_reward": 0.00399660924449563, "rewards/frontier_entropy_batch_reward": -0.2828936755657196, "rewards/volume_coverage_0": -8.655792888001557e-10, "rewards/volume_coverage_1": -8.655792888001557e-10, "rewards/volume_coverage_10": -1.4397807245047912e-09, "rewards/volume_coverage_15": -1.377830247117906e-09, "rewards/volume_coverage_20": 1.7711275204335131e-06, "rewards/volume_coverage_25": 0.0002565411617979407, "rewards/volume_coverage_5": -1.2134155374932121e-09, "signal/accuracy_reward/centered_abs_mean": 0.15776909440755843, "signal/accuracy_reward/group_std_mean": 0.20477839410305024, "signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07888454720377922, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07888454720377922, "signal/advantage_abs_mean": 0.10334948301315308, "signal/advantage_pre_scale_abs_mean": 0.10334948301315308, "signal/advantage_pre_scale_std": 0.16254353821277617, "signal/advantage_std": 0.16254353821277617, "signal/brier_reward/centered_abs_mean": 0.14287383258342742, "signal/brier_reward/group_std_mean": 0.18298307061195374, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014287383668124676, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014287383668124676, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029984532669186593, "signal/confidence_uniqueness_reward/group_std_mean": 0.047179107740521434, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002998453238978982, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002998453238978982, "signal/format_reward/centered_abs_mean": 0.017936197854578496, "signal/format_reward/group_std_mean": 0.032711121067404744, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008968098927289248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008968098927289248, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016798900673165918, "signal/frontier_aurc_reward/group_std_mean": 0.002934660855680704, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0998626496293582e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0998626496293582e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03310015164315701, "signal/frontier_ece_reward/group_std_mean": 0.04407154768705368, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003310015145689249, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003310015145689249, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34080212712287905, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41118053793907167, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03408021330833435, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03408021330833435, "signal/volume_coverage_0/centered_abs_mean": 3.149529201329493e-09, "signal/volume_coverage_0/group_std_mean": 4.050445145953319e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8833333373069763, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.149529425455766e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 3.149529425455766e-10, "signal/volume_coverage_1/centered_abs_mean": 3.149529201329493e-09, "signal/volume_coverage_1/group_std_mean": 4.050445145953319e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8833333373069763, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.149529425455766e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 3.149529425455766e-10, "signal/volume_coverage_10/centered_abs_mean": 7.936655319062957e-09, "signal/volume_coverage_10/group_std_mean": 1.0478954781678596e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.7555555641651154, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.936655232326784e-10, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 7.936655232326784e-10, "signal/volume_coverage_15/centered_abs_mean": 1.818619645987951e-08, "signal/volume_coverage_15/group_std_mean": 2.396414628957899e-08, "signal/volume_coverage_15/group_zero_std_frac": 0.6500000059604645, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.8186196201752658e-09, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.8186196201752658e-09, "signal/volume_coverage_20/centered_abs_mean": 4.4338119351783334e-05, "signal/volume_coverage_20/group_std_mean": 5.8473115660206965e-05, "signal/volume_coverage_20/group_zero_std_frac": 0.5638888895511627, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.433812225967948e-06, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 4.433812225967948e-06, "signal/volume_coverage_25/centered_abs_mean": 0.0035675803665071726, "signal/volume_coverage_25/group_std_mean": 0.004789471440017223, "signal/volume_coverage_25/group_zero_std_frac": 0.03333333432674408, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0003567580570233986, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0003567580570233986, "signal/volume_coverage_5/centered_abs_mean": 3.919237409610865e-09, "signal/volume_coverage_5/group_std_mean": 5.091148846947391e-09, "signal/volume_coverage_5/group_zero_std_frac": 0.8777777791023255, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.9192375005103754e-10, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.9192375005103754e-10, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.164208079490717, "eval_calibration/batch_distribution_entropy": 0.8908815670220108, "eval_calibration/buffer_distribution_entropy": 0.9725765790521952, "eval_calibration/confidence_entropy": 0.5328010527649214, "eval_calibration/coverage@0%": 0.2471438172043011, "eval_calibration/coverage@1%": 0.2471438172043011, "eval_calibration/coverage@10%": 0.4008736559139785, "eval_calibration/coverage@15%": 0.5078965053763441, "eval_calibration/coverage@20%": 0.7123655913978495, "eval_calibration/coverage@25%": 0.8172043010752689, "eval_calibration/coverage@30%": 0.9375, "eval_calibration/coverage@5%": 0.2471438172043011, "eval_calibration/ece": 0.1731811553540427, "eval_calibration/mean_confidence": 0.6129084461244519, "eval_completions/clipped_ratio": 0.008680555555555544, "eval_completions/max_length": 3040.5, "eval_completions/max_terminated_length": 3040.5, "eval_completions/mean_length": 835.7632242838541, "eval_completions/mean_terminated_length": 843.113779703776, "eval_completions/min_length": 73.33333333333333, "eval_completions/min_terminated_length": 332.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 329170712.0, "eval_reward": 0.9062922497590383, "eval_reward_std": 0.24627330154180527, "eval_rewards/accuracy_reward": 0.683159718910853, "eval_rewards/brier_reward": 0.7905897796154022, "eval_rewards/confidence_uniqueness_reward": 0.8871917724609375, "eval_rewards/format_reward": 0.9904513855775198, "eval_rewards/frontier_aurc_reward": -0.0017877337328779201, "eval_rewards/frontier_ece_reward": 0.005614791589323431, "eval_rewards/frontier_entropy_batch_reward": -0.9904513855775198, "eval_rewards/volume_coverage_0": -2.907263158512252e-10, "eval_rewards/volume_coverage_1": -2.907263158512252e-10, "eval_rewards/volume_coverage_10": -5.503734481009523e-11, "eval_rewards/volume_coverage_15": 2.1709352926446712e-09, "eval_rewards/volume_coverage_20": 1.7207810086479942e-05, "eval_rewards/volume_coverage_25": 0.002128113391033063, "eval_rewards/volume_coverage_5": -2.907263158512252e-10, "eval_runtime": 198.2371, "eval_samples_per_second": 5.044, "eval_signal/accuracy_reward/centered_abs_mean": 0.4220377554496129, "eval_signal/accuracy_reward/group_std_mean": 0.4656520187854767, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21101887772480646, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21101887772480646, "eval_signal/advantage_abs_mean": 0.21716739485661188, "eval_signal/advantage_pre_scale_abs_mean": 0.21716739485661188, "eval_signal/advantage_pre_scale_std": 0.2442422236005465, "eval_signal/advantage_std": 0.2442422236005465, "eval_signal/brier_reward/centered_abs_mean": 0.17609463135401407, "eval_signal/brier_reward/group_std_mean": 0.2297411933541298, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017609463073313236, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.017609463073313236, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05053987664481004, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0819082868595918, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005053987881789605, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005053987881789605, "eval_signal/format_reward/centered_abs_mean": 0.018391926928112905, "eval_signal/format_reward/group_std_mean": 0.051025692063073315, "eval_signal/format_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963464056453, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963464056453, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030478331997680166, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006132548054059346, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.809791511836617e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.809791511836617e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03784426177541415, "eval_signal/frontier_ece_reward/group_std_mean": 0.053607478737831116, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037844261775414148, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037844261775414148, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018391926928112905, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.051025692063073315, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018391927393774192, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018391927393774192, "eval_signal/volume_coverage_0/centered_abs_mean": 2.2895876074698527e-09, "eval_signal/volume_coverage_0/group_std_mean": 3.2587387001810817e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.8888889153798422, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.2895876947842675e-10, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 2.2895876947842675e-10, "eval_signal/volume_coverage_1/centered_abs_mean": 2.2895876074698527e-09, "eval_signal/volume_coverage_1/group_std_mean": 3.2587387001810817e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.8888889153798422, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.2895876947842675e-10, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 2.2895876947842675e-10, "eval_signal/volume_coverage_10/centered_abs_mean": 1.1983273628020838e-08, "eval_signal/volume_coverage_10/group_std_mean": 1.7429221228308183e-08, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.7777777910232544, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.1983274335209775e-09, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.1983274335209775e-09, "eval_signal/volume_coverage_15/centered_abs_mean": 5.853292239788033e-08, "eval_signal/volume_coverage_15/group_std_mean": 8.00400753268408e-08, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.5277777860562006, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.853291987674887e-09, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 5.853291987674887e-09, "eval_signal/volume_coverage_20/centered_abs_mean": 0.0003371686664953207, "eval_signal/volume_coverage_20/group_std_mean": 0.00047656046808697283, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.1944444477558136, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.371686761965975e-05, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 3.371686761965975e-05, "eval_signal/volume_coverage_25/centered_abs_mean": 0.010561376344412565, "eval_signal/volume_coverage_25/group_std_mean": 0.016184291957567137, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00105613767906713, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.00105613767906713, "eval_signal/volume_coverage_5/centered_abs_mean": 2.2895876074698527e-09, "eval_signal/volume_coverage_5/group_std_mean": 3.2587387001810817e-09, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.8888889153798422, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.2895876947842675e-10, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 2.2895876947842675e-10, "eval_steps_per_second": 0.03, "step": 150 }, { "calibration/aurc": 0.18438420495247035, "calibration/batch_distribution_entropy": 0.9603641675120886, "calibration/buffer_distribution_entropy": 0.9744252500060735, "calibration/confidence_entropy": 0.5168124227269436, "calibration/coverage@0%": 0.013314838594667516, "calibration/coverage@1%": 0.013314838594667516, "calibration/coverage@10%": 0.21615471907211553, "calibration/coverage@15%": 0.4260975711864175, "calibration/coverage@20%": 0.7192844677137871, "calibration/coverage@25%": 0.8029532068062826, "calibration/coverage@30%": 0.838001745200698, "calibration/coverage@5%": 0.10868268600611168, "calibration/ece": 0.20437914411274316, "calibration/mean_confidence": 0.5928247329203169, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555558, "completions/max_length": 3466.8, "completions/max_terminated_length": 3466.8, "completions/mean_length": 815.3238647460937, "completions/mean_terminated_length": 823.2176879882812, "completions/min_length": 0.0, "completions/min_terminated_length": 269.6, "epoch": 0.3719953500581243, "grad_norm": 0.0003626790421549231, "learning_rate": 1.5963855421686747e-06, "loss": -0.0074, "num_tokens": 341670955.0, "reward": 1.007612144947052, "reward_std": 0.1327279031276703, "rewards/accuracy_reward": 0.7348090291023255, "rewards/brier_reward": 0.7994078278541565, "rewards/confidence_uniqueness_reward": 0.9393423318862915, "rewards/format_reward": 0.9905381917953491, "rewards/frontier_aurc_reward": -0.0014131779549643396, "rewards/frontier_ece_reward": 0.0025281490292400123, "rewards/frontier_entropy_batch_reward": -0.29814456701278685, "rewards/volume_coverage_0": -8.647244968684742e-11, "rewards/volume_coverage_1": -8.647244968684742e-11, "rewards/volume_coverage_10": -5.2008233206168875e-09, "rewards/volume_coverage_15": -1.23952112457415e-07, "rewards/volume_coverage_20": -0.0001249164422915783, "rewards/volume_coverage_25": 0.00655297446064651, "rewards/volume_coverage_5": -8.647244968684742e-11, "signal/accuracy_reward/centered_abs_mean": 0.15657009482383727, "signal/accuracy_reward/group_std_mean": 0.203624826669693, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07828504741191863, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07828504741191863, "signal/advantage_abs_mean": 0.10121837258338928, "signal/advantage_pre_scale_abs_mean": 0.10121837258338928, "signal/advantage_pre_scale_std": 0.16051416099071503, "signal/advantage_std": 0.16051416099071503, "signal/brier_reward/centered_abs_mean": 0.1417425900697708, "signal/brier_reward/group_std_mean": 0.18280084431171417, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014174258708953858, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014174258708953858, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028596949204802512, "signal/confidence_uniqueness_reward/group_std_mean": 0.04525943174958229, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002859694929793477, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002859694929793477, "signal/format_reward/centered_abs_mean": 0.016194661520421504, "signal/format_reward/group_std_mean": 0.030201531946659088, "signal/format_reward/group_zero_std_frac": 0.8777777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008097330760210752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008097330760210752, "signal/frontier_aurc_reward/centered_abs_mean": 0.002045383723452687, "signal/frontier_aurc_reward/group_std_mean": 0.0035399315878748895, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.55672955972841e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.55672955972841e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.030501941591501235, "signal/frontier_ece_reward/group_std_mean": 0.040661169588565825, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003050194028764963, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003050194028764963, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3381875276565552, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40684131979942323, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03381875231862068, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03381875231862068, "signal/volume_coverage_0/centered_abs_mean": 5.851382822719131e-10, "signal/volume_coverage_0/group_std_mean": 7.571845983544989e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.851382954558116e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.851382954558116e-11, "signal/volume_coverage_1/centered_abs_mean": 5.851382822719131e-10, "signal/volume_coverage_1/group_std_mean": 7.571845983544989e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.851382954558116e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.851382954558116e-11, "signal/volume_coverage_10/centered_abs_mean": 1.061550365388797e-08, "signal/volume_coverage_10/group_std_mean": 1.4323466168697507e-08, "signal/volume_coverage_10/group_zero_std_frac": 0.825, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.0615504031363798e-09, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 1.0615504031363798e-09, "signal/volume_coverage_15/centered_abs_mean": 2.943361390173038e-07, "signal/volume_coverage_15/group_std_mean": 3.8099229167087855e-07, "signal/volume_coverage_15/group_zero_std_frac": 0.622222226858139, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.943361464557981e-08, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 2.943361464557981e-08, "signal/volume_coverage_20/centered_abs_mean": 0.0009150244877673686, "signal/volume_coverage_20/group_std_mean": 0.0012456974247470499, "signal/volume_coverage_20/group_zero_std_frac": 0.11111111417412758, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 9.150245241471567e-05, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 9.150245241471567e-05, "signal/volume_coverage_25/centered_abs_mean": 0.0145043870434165, "signal/volume_coverage_25/group_std_mean": 0.019368264079093932, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0014504387276247145, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.0014504387276247145, "signal/volume_coverage_5/centered_abs_mean": 5.851382822719131e-10, "signal/volume_coverage_5/group_std_mean": 7.571845983544989e-10, "signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.851382954558116e-11, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.851382954558116e-11, "step": 155 }, { "calibration/aurc": 0.1466230797636561, "calibration/batch_distribution_entropy": 0.964288356863465, "calibration/buffer_distribution_entropy": 0.9780273636730223, "calibration/confidence_entropy": 0.5103002408452058, "calibration/coverage@0%": 0.12145725031133525, "calibration/coverage@1%": 0.1261569892147295, "calibration/coverage@10%": 0.47141484462912897, "calibration/coverage@15%": 0.7015038387175796, "calibration/coverage@20%": 0.7837500349973402, "calibration/coverage@25%": 0.823630204104488, "calibration/coverage@30%": 0.8379679144385026, "calibration/coverage@5%": 0.29739208200252454, "calibration/ece": 0.1881009639784636, "calibration/mean_confidence": 0.5813083634195351, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015364583333333326, "completions/max_length": 3313.0, "completions/max_terminated_length": 3313.0, "completions/mean_length": 827.1549682617188, "completions/mean_terminated_length": 840.0288940429688, "completions/min_length": 0.0, "completions/min_terminated_length": 265.4, "epoch": 0.38399520005999926, "grad_norm": 0.0003019192081410438, "learning_rate": 1.4457831325301204e-06, "loss": -0.0119, "num_tokens": 354287076.0, "reward": 0.9753640413284301, "reward_std": 0.13932546079158784, "rewards/accuracy_reward": 0.6730034708976745, "rewards/brier_reward": 0.7690565705299377, "rewards/confidence_uniqueness_reward": 0.9359739542007446, "rewards/format_reward": 0.9844618082046509, "rewards/frontier_aurc_reward": -0.0019856867846101524, "rewards/frontier_ece_reward": 0.0022318214061670004, "rewards/frontier_entropy_batch_reward": -0.25475322306156156, "rewards/volume_coverage_0": -6.172270767407229e-11, "rewards/volume_coverage_1": -6.172270767407229e-11, "rewards/volume_coverage_10": -2.8393437445650704e-08, "rewards/volume_coverage_15": -4.993317070045577e-07, "rewards/volume_coverage_20": 0.0004009470983874053, "rewards/volume_coverage_25": 0.013652586936950683, "rewards/volume_coverage_5": 1.4168433531795699e-08, "signal/accuracy_reward/centered_abs_mean": 0.1557996988296509, "signal/accuracy_reward/group_std_mean": 0.20581969022750854, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07789984941482545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07789984941482545, "signal/advantage_abs_mean": 0.10422182083129883, "signal/advantage_pre_scale_abs_mean": 0.10422182083129883, "signal/advantage_pre_scale_std": 0.16794657409191133, "signal/advantage_std": 0.16794657409191133, "signal/brier_reward/centered_abs_mean": 0.15780293941497803, "signal/brier_reward/group_std_mean": 0.20066075921058654, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015780294500291347, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015780294500291347, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03549051396548748, "signal/confidence_uniqueness_reward/group_std_mean": 0.05435318723320961, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003549051284790039, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003549051284790039, "signal/format_reward/centered_abs_mean": 0.0249620221555233, "signal/format_reward/group_std_mean": 0.04160917028784752, "signal/format_reward/group_zero_std_frac": 0.844444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01248101107776165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01248101107776165, "signal/frontier_aurc_reward/centered_abs_mean": 0.002445269119925797, "signal/frontier_aurc_reward/group_std_mean": 0.0041489469353109595, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056586501770653e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056586501770653e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03229519799351692, "signal/frontier_ece_reward/group_std_mean": 0.042399514466524124, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032295198645442722, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032295198645442722, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3162634313106537, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38911319971084596, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03162634521722794, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03162634521722794, "signal/volume_coverage_0/centered_abs_mean": 5.618044016109103e-10, "signal/volume_coverage_0/group_std_mean": 7.310222538414734e-10, "signal/volume_coverage_0/group_zero_std_frac": 0.9972222208976745, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.618044328359329e-11, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.618044328359329e-11, "signal/volume_coverage_1/centered_abs_mean": 5.618044016109103e-10, "signal/volume_coverage_1/group_std_mean": 7.310222538414734e-10, "signal/volume_coverage_1/group_zero_std_frac": 0.9972222208976745, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.618044328359329e-11, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.618044328359329e-11, "signal/volume_coverage_10/centered_abs_mean": 3.5007122471952813e-07, "signal/volume_coverage_10/group_std_mean": 4.474250966168825e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.775, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.5007123339036994e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.5007123339036994e-08, "signal/volume_coverage_15/centered_abs_mean": 7.844799370104738e-06, "signal/volume_coverage_15/group_std_mean": 1.0182519690715709e-05, "signal/volume_coverage_15/group_zero_std_frac": 0.3694444507360458, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.844799313261319e-07, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 7.844799313261319e-07, "signal/volume_coverage_20/centered_abs_mean": 0.0039816807955503465, "signal/volume_coverage_20/group_std_mean": 0.005318196397274733, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0003981680842116475, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0003981680842116475, "signal/volume_coverage_25/centered_abs_mean": 0.023230988532304764, "signal/volume_coverage_25/group_std_mean": 0.030292440578341483, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.002323098946362734, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.002323098946362734, "signal/volume_coverage_5/centered_abs_mean": 1.1731535911468916e-07, "signal/volume_coverage_5/group_std_mean": 1.4786015383183583e-07, "signal/volume_coverage_5/group_zero_std_frac": 0.9305555582046509, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1731536444029022e-08, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.1731536444029022e-08, "step": 160 }, { "calibration/aurc": 0.1546312380703148, "calibration/batch_distribution_entropy": 0.9767267361123911, "calibration/buffer_distribution_entropy": 0.9821229946566788, "calibration/confidence_entropy": 0.48704984961187714, "calibration/coverage@0%": 0.025887868601684395, "calibration/coverage@1%": 0.025887868601684395, "calibration/coverage@10%": 0.5159800660623028, "calibration/coverage@15%": 0.5914674495924496, "calibration/coverage@20%": 0.6686515086515087, "calibration/coverage@25%": 0.7135845778505059, "calibration/coverage@30%": 0.9001099239326387, "calibration/coverage@5%": 0.26043661377542954, "calibration/ece": 0.2139093327163634, "calibration/mean_confidence": 0.5058844434572112, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017447916666666653, "completions/max_length": 3554.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 868.8214477539062, "completions/mean_terminated_length": 884.3752075195313, "completions/min_length": 0.0, "completions/min_terminated_length": 287.0, "epoch": 0.39599505006187424, "grad_norm": 0.0003124874783679843, "learning_rate": 1.2951807228915664e-06, "loss": -0.0099, "num_tokens": 367434971.0, "reward": 0.9755571007728576, "reward_std": 0.1345706507563591, "rewards/accuracy_reward": 0.6695312619209289, "rewards/brier_reward": 0.7570839524269104, "rewards/confidence_uniqueness_reward": 0.9359697103500366, "rewards/format_reward": 0.9825520753860474, "rewards/frontier_aurc_reward": -0.0016420065890997647, "rewards/frontier_ece_reward": 0.0011796611128374935, "rewards/frontier_entropy_batch_reward": -0.2225494861602783, "rewards/volume_coverage_0": -2.2794013121441027e-09, "rewards/volume_coverage_1": -2.2794013121441027e-09, "rewards/volume_coverage_10": -5.1920217991607843e-08, "rewards/volume_coverage_15": 1.054141050360613e-06, "rewards/volume_coverage_20": 0.0007828456378774718, "rewards/volume_coverage_25": 0.022891780361533165, "rewards/volume_coverage_5": -5.379716716547023e-09, "signal/accuracy_reward/centered_abs_mean": 0.14666341245174408, "signal/accuracy_reward/group_std_mean": 0.19849115908145903, "signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07333170622587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07333170622587204, "signal/advantage_abs_mean": 0.09981575608253479, "signal/advantage_pre_scale_abs_mean": 0.09981575608253479, "signal/advantage_pre_scale_std": 0.16138841807842255, "signal/advantage_std": 0.16138841807842255, "signal/brier_reward/centered_abs_mean": 0.166391322016716, "signal/brier_reward/group_std_mean": 0.21182333827018737, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01663913168013096, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01663913168013096, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0341212198138237, "signal/confidence_uniqueness_reward/group_std_mean": 0.05267147943377495, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034121218603104355, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034121218603104355, "signal/format_reward/centered_abs_mean": 0.02448459193110466, "signal/format_reward/group_std_mean": 0.041171152144670486, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01224229596555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01224229596555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021314293844625354, "signal/frontier_aurc_reward/group_std_mean": 0.0037278625182807445, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6642868033377453e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6642868033377453e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03538916334509849, "signal/frontier_ece_reward/group_std_mean": 0.045856249332427976, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003538916353136301, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003538916353136301, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29995506405830386, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37324848771095276, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029995508119463922, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029995508119463922, "signal/volume_coverage_0/centered_abs_mean": 8.71896759235824e-09, "signal/volume_coverage_0/group_std_mean": 1.1290637758065714e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.7361111164093017, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.718967769993924e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 8.718967769993924e-10, "signal/volume_coverage_1/centered_abs_mean": 8.71896759235824e-09, "signal/volume_coverage_1/group_std_mean": 1.1290637758065714e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.7361111164093017, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.718967769993924e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 8.718967769993924e-10, "signal/volume_coverage_10/centered_abs_mean": 5.929277968164115e-07, "signal/volume_coverage_10/group_std_mean": 7.720077775275058e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.3805555671453476, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.929278206195931e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 5.929278206195931e-08, "signal/volume_coverage_15/centered_abs_mean": 1.5034493299026507e-05, "signal/volume_coverage_15/group_std_mean": 1.9472881831461564e-05, "signal/volume_coverage_15/group_zero_std_frac": 0.21388889122754334, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.5034493799248593e-06, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.5034493799248593e-06, "signal/volume_coverage_20/centered_abs_mean": 0.007102501392364502, "signal/volume_coverage_20/group_std_mean": 0.009295590687543154, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0007102501345798373, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0007102501345798373, "signal/volume_coverage_25/centered_abs_mean": 0.03103804439306259, "signal/volume_coverage_25/group_std_mean": 0.03991614505648613, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.003103804448619485, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.003103804448619485, "signal/volume_coverage_5/centered_abs_mean": 2.153096625434614e-08, "signal/volume_coverage_5/group_std_mean": 2.8507457550119853e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6916666746139526, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.15309672313424e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 2.15309672313424e-09, "step": 165 }, { "calibration/aurc": 0.17371525972343724, "calibration/batch_distribution_entropy": 0.9461858305303827, "calibration/buffer_distribution_entropy": 0.9854462094498718, "calibration/confidence_entropy": 0.4846643375245122, "calibration/coverage@0%": 0.010455004351610096, "calibration/coverage@1%": 0.010455004351610096, "calibration/coverage@10%": 0.35906587241032534, "calibration/coverage@15%": 0.4774973328013402, "calibration/coverage@20%": 0.6729397947160021, "calibration/coverage@25%": 0.7635555132449945, "calibration/coverage@30%": 0.8675021758050478, "calibration/coverage@5%": 0.09661688424717145, "calibration/ece": 0.1664582589769637, "calibration/mean_confidence": 0.6071375677769686, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011371527777777769, "completions/max_length": 3648.4, "completions/max_terminated_length": 3648.4, "completions/mean_length": 863.1968872070313, "completions/mean_terminated_length": 873.1014892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 278.6, "epoch": 0.4079949000637492, "grad_norm": 0.0003431806981097907, "learning_rate": 1.1445783132530121e-06, "loss": -0.0085, "num_tokens": 380468183.0, "reward": 1.0040875792503356, "reward_std": 0.1347779542207718, "rewards/accuracy_reward": 0.7197048664093018, "rewards/brier_reward": 0.7805811762809753, "rewards/confidence_uniqueness_reward": 0.9393272161483764, "rewards/format_reward": 0.9886284828186035, "rewards/frontier_aurc_reward": -0.0017156409798189999, "rewards/frontier_ece_reward": -0.0007315105176530778, "rewards/frontier_entropy_batch_reward": -0.2580687701702118, "rewards/volume_coverage_0": -1.211497779873838e-09, "rewards/volume_coverage_1": -1.211497779873838e-09, "rewards/volume_coverage_10": -8.929526549295019e-08, "rewards/volume_coverage_15": -1.3116566924509243e-05, "rewards/volume_coverage_20": 0.0010870593221625313, "rewards/volume_coverage_25": 0.037241144105792046, "rewards/volume_coverage_5": -1.2136919802507862e-09, "signal/accuracy_reward/centered_abs_mean": 0.14935438632965087, "signal/accuracy_reward/group_std_mean": 0.20225562751293183, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07467719316482543, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07467719316482543, "signal/advantage_abs_mean": 0.09886526316404343, "signal/advantage_pre_scale_abs_mean": 0.09886526316404343, "signal/advantage_pre_scale_std": 0.16056418120861055, "signal/advantage_std": 0.16056418120861055, "signal/brier_reward/centered_abs_mean": 0.1546865701675415, "signal/brier_reward/group_std_mean": 0.19657468795776367, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015468657575547695, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015468657575547695, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03084472641348839, "signal/confidence_uniqueness_reward/group_std_mean": 0.05063636749982834, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030844727531075477, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030844727531075477, "signal/format_reward/centered_abs_mean": 0.019156900979578496, "signal/format_reward/group_std_mean": 0.03671438507735729, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009578450489789248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009578450489789248, "signal/frontier_aurc_reward/centered_abs_mean": 0.002360010566189885, "signal/frontier_aurc_reward/group_std_mean": 0.004065482737496495, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9500132950488477e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9500132950488477e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.031055760383605958, "signal/frontier_ece_reward/group_std_mean": 0.04056341871619225, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003105575917288661, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003105575917288661, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32181860208511354, "signal/frontier_entropy_batch_reward/group_std_mean": 0.391850072145462, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03218186013400555, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03218186013400555, "signal/volume_coverage_0/centered_abs_mean": 9.236164411063897e-09, "signal/volume_coverage_0/group_std_mean": 1.196727463792513e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.7083333373069763, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.236164610904041e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 9.236164610904041e-10, "signal/volume_coverage_1/centered_abs_mean": 9.236164411063897e-09, "signal/volume_coverage_1/group_std_mean": 1.196727463792513e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.7083333373069763, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.236164610904041e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 9.236164610904041e-10, "signal/volume_coverage_10/centered_abs_mean": 5.15992598248971e-07, "signal/volume_coverage_10/group_std_mean": 6.642203786100253e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.4499999970197678, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.159925713371649e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 5.159925713371649e-08, "signal/volume_coverage_15/centered_abs_mean": 5.2554698049789296e-05, "signal/volume_coverage_15/group_std_mean": 7.018936448730529e-05, "signal/volume_coverage_15/group_zero_std_frac": 0.16666666865348817, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.25546981862135e-06, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 5.25546981862135e-06, "signal/volume_coverage_20/centered_abs_mean": 0.008253774605691433, "signal/volume_coverage_20/group_std_mean": 0.010948755592107774, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0008253774838522076, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0008253774838522076, "signal/volume_coverage_25/centered_abs_mean": 0.03802705928683281, "signal/volume_coverage_25/group_std_mean": 0.04884639978408813, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.003802705928683281, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.003802705928683281, "signal/volume_coverage_5/centered_abs_mean": 1.2833939466716515e-08, "signal/volume_coverage_5/group_std_mean": 1.6756578347099094e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6888888955116272, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2833940021828028e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.2833940021828028e-09, "step": 170 }, { "calibration/aurc": 0.140991436437331, "calibration/batch_distribution_entropy": 0.9774483483818164, "calibration/buffer_distribution_entropy": 0.9877175559458158, "calibration/confidence_entropy": 0.5059720102009231, "calibration/coverage@0%": 0.045580104973068296, "calibration/coverage@1%": 0.10364462110210056, "calibration/coverage@10%": 0.3414710206091299, "calibration/coverage@15%": 0.6232919464639679, "calibration/coverage@20%": 0.8037394190618983, "calibration/coverage@25%": 0.8891310954551734, "calibration/coverage@30%": 0.9374775521480867, "calibration/coverage@5%": 0.15040337345337373, "calibration/ece": 0.19020302118642468, "calibration/mean_confidence": 0.5479368743882117, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014322916666666652, "completions/max_length": 3943.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 893.93671875, "completions/mean_terminated_length": 906.9239624023437, "completions/min_length": 0.0, "completions/min_terminated_length": 275.0, "epoch": 0.4199947500656242, "grad_norm": 0.0003500058373901993, "learning_rate": 9.93975903614458e-07, "loss": -0.0124, "num_tokens": 393874302.0, "reward": 0.9988266825675964, "reward_std": 0.139481620490551, "rewards/accuracy_reward": 0.71171875, "rewards/brier_reward": 0.7771403908729553, "rewards/confidence_uniqueness_reward": 0.9368108153343201, "rewards/format_reward": 0.9856770873069763, "rewards/frontier_aurc_reward": -0.0016718719620257617, "rewards/frontier_ece_reward": -0.0001888960599899292, "rewards/frontier_entropy_batch_reward": -0.26143977642059324, "rewards/volume_coverage_0": -1.1183640669792938e-09, "rewards/volume_coverage_1": -1.1183640669792938e-09, "rewards/volume_coverage_10": -2.567473664782938e-08, "rewards/volume_coverage_15": -6.893287354614585e-06, "rewards/volume_coverage_20": 0.0032489079516381026, "rewards/volume_coverage_25": 0.04593147337436676, "rewards/volume_coverage_5": -1.3001145759972133e-08, "signal/accuracy_reward/centered_abs_mean": 0.1520887643098831, "signal/accuracy_reward/group_std_mean": 0.20999579429626464, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07604438215494155, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07604438215494155, "signal/advantage_abs_mean": 0.10078130513429642, "signal/advantage_pre_scale_abs_mean": 0.10078130513429642, "signal/advantage_pre_scale_std": 0.167160502076149, "signal/advantage_std": 0.167160502076149, "signal/brier_reward/centered_abs_mean": 0.1584286332130432, "signal/brier_reward/group_std_mean": 0.20207290053367616, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015842863731086253, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015842863731086253, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03476654589176178, "signal/confidence_uniqueness_reward/group_std_mean": 0.056284508854150775, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034766546450555325, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034766546450555325, "signal/format_reward/centered_abs_mean": 0.02369249127805233, "signal/format_reward/group_std_mean": 0.04311860054731369, "signal/format_reward/group_zero_std_frac": 0.825000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011846245639026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011846245639026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022591098211705686, "signal/frontier_aurc_reward/group_std_mean": 0.003889821656048298, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8238874438102358e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8238874438102358e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03167073018848896, "signal/frontier_ece_reward/group_std_mean": 0.041566482931375506, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031670730095356703, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031670730095356703, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3187211215496063, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3893312394618988, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03187211267650127, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03187211267650127, "signal/volume_coverage_0/centered_abs_mean": 6.795347695653931e-09, "signal/volume_coverage_0/group_std_mean": 8.810956009241978e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8166666626930237, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.795348173049831e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.795348173049831e-10, "signal/volume_coverage_1/centered_abs_mean": 6.795347695653931e-09, "signal/volume_coverage_1/group_std_mean": 8.810956009241978e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8166666626930237, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.795348173049831e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.795348173049831e-10, "signal/volume_coverage_10/centered_abs_mean": 3.022442399469583e-07, "signal/volume_coverage_10/group_std_mean": 4.0371518821302744e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.48611110746860503, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.022442296440886e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.022442296440886e-08, "signal/volume_coverage_15/centered_abs_mean": 6.898900683154352e-05, "signal/volume_coverage_15/group_std_mean": 9.025059152918402e-05, "signal/volume_coverage_15/group_zero_std_frac": 0.22777778208255767, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.898901301610749e-06, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 6.898901301610749e-06, "signal/volume_coverage_20/centered_abs_mean": 0.011228302493691444, "signal/volume_coverage_20/group_std_mean": 0.014794117771089077, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0011228302493691445, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0011228302493691445, "signal/volume_coverage_25/centered_abs_mean": 0.04425051659345627, "signal/volume_coverage_25/group_std_mean": 0.05683296546339989, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.004425051528960467, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.004425051528960467, "signal/volume_coverage_5/centered_abs_mean": 5.0608609925006934e-08, "signal/volume_coverage_5/group_std_mean": 6.722306373774245e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6777777791023254, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.0608611079638875e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.0608611079638875e-09, "step": 175 }, { "calibration/aurc": 0.09735049164862375, "calibration/batch_distribution_entropy": 0.9718827549906518, "calibration/buffer_distribution_entropy": 0.9891104431593053, "calibration/confidence_entropy": 0.5013433986553448, "calibration/coverage@0%": 0.0641919163548773, "calibration/coverage@1%": 0.08208665319698255, "calibration/coverage@10%": 0.6338338045297767, "calibration/coverage@15%": 0.7994965859597166, "calibration/coverage@20%": 0.8994158533623254, "calibration/coverage@25%": 0.9608651226158038, "calibration/coverage@30%": 0.9796875, "calibration/coverage@5%": 0.3505137032120355, "calibration/ece": 0.18134802593018456, "calibration/mean_confidence": 0.5809839208489577, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016666666666666673, "completions/max_length": 3667.4, "completions/max_terminated_length": 3667.4, "completions/mean_length": 848.6334350585937, "completions/mean_terminated_length": 862.9874877929688, "completions/min_length": 0.0, "completions/min_terminated_length": 287.2, "epoch": 0.4319946000674992, "grad_norm": 0.0003335257642902434, "learning_rate": 8.433734939759036e-07, "loss": -0.0121, "num_tokens": 406750527.0, "reward": 0.9957963824272156, "reward_std": 0.14223625361919404, "rewards/accuracy_reward": 0.7073784708976746, "rewards/brier_reward": 0.7794308066368103, "rewards/confidence_uniqueness_reward": 0.9341415762901306, "rewards/format_reward": 0.9833333373069764, "rewards/frontier_aurc_reward": -0.0021487005054950715, "rewards/frontier_ece_reward": 0.000431177020072937, "rewards/frontier_entropy_batch_reward": -0.26810349225997926, "rewards/volume_coverage_0": -2.0573607123486327e-10, "rewards/volume_coverage_1": -2.0573607123486327e-10, "rewards/volume_coverage_10": -1.6212315223640416e-08, "rewards/volume_coverage_15": 9.642715667723678e-07, "rewards/volume_coverage_20": 0.005197459273040295, "rewards/volume_coverage_25": 0.05357494503259659, "rewards/volume_coverage_5": -5.968378763432369e-10, "signal/accuracy_reward/centered_abs_mean": 0.1573187917470932, "signal/accuracy_reward/group_std_mean": 0.2063736468553543, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0786593958735466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0786593958735466, "signal/advantage_abs_mean": 0.10662449449300766, "signal/advantage_pre_scale_abs_mean": 0.10662449449300766, "signal/advantage_pre_scale_std": 0.17244213521480561, "signal/advantage_std": 0.17244213521480561, "signal/brier_reward/centered_abs_mean": 0.15907377898693084, "signal/brier_reward/group_std_mean": 0.2006416529417038, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015907378122210502, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015907378122210502, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03644292093813419, "signal/confidence_uniqueness_reward/group_std_mean": 0.05790592879056931, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036442920099943876, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036442920099943876, "signal/format_reward/centered_abs_mean": 0.02569444477558136, "signal/format_reward/group_std_mean": 0.04491157345473766, "signal/format_reward/group_zero_std_frac": 0.825000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01284722238779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01284722238779068, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028547207824885846, "signal/frontier_aurc_reward/group_std_mean": 0.00510807204991579, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.568401371012442e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.568401371012442e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.031456112116575244, "signal/frontier_ece_reward/group_std_mean": 0.0408004954457283, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031456112395972014, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031456112395972014, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32392174005508423, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39390974044799804, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03239217437803745, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03239217437803745, "signal/volume_coverage_0/centered_abs_mean": 6.894463333573242e-09, "signal/volume_coverage_0/group_std_mean": 8.965259878923603e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.8277777791023254, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.894463999707056e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.894463999707056e-10, "signal/volume_coverage_1/centered_abs_mean": 6.894463333573242e-09, "signal/volume_coverage_1/group_std_mean": 8.965259878923603e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.8277777791023254, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.894463999707056e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.894463999707056e-10, "signal/volume_coverage_10/centered_abs_mean": 2.0830769500435054e-07, "signal/volume_coverage_10/group_std_mean": 2.7283264500965745e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.4138888895511627, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0830770885993388e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.0830770885993388e-08, "signal/volume_coverage_15/centered_abs_mean": 9.59263401455246e-05, "signal/volume_coverage_15/group_std_mean": 0.0001241161226062104, "signal/volume_coverage_15/group_zero_std_frac": 0.1916666731238365, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 9.592634523869491e-06, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 9.592634523869491e-06, "signal/volume_coverage_20/centered_abs_mean": 0.013449459336698055, "signal/volume_coverage_20/group_std_mean": 0.01758615728467703, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0013449459336698055, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0013449459336698055, "signal/volume_coverage_25/centered_abs_mean": 0.05379967465996742, "signal/volume_coverage_25/group_std_mean": 0.06872403174638748, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.005379967298358679, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.005379967298358679, "signal/volume_coverage_5/centered_abs_mean": 1.9899143155477362e-08, "signal/volume_coverage_5/group_std_mean": 2.5720825524988555e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.7027777791023254, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.989914355515765e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.989914355515765e-09, "step": 180 }, { "calibration/aurc": 0.19188778559925812, "calibration/batch_distribution_entropy": 0.9692985646044558, "calibration/buffer_distribution_entropy": 0.9885363470800119, "calibration/confidence_entropy": 0.49170521564602654, "calibration/coverage@0%": 0.007841128807246222, "calibration/coverage@1%": 0.007841128807246222, "calibration/coverage@10%": 0.13206599825894333, "calibration/coverage@15%": 0.3171304564834864, "calibration/coverage@20%": 0.6903923725390136, "calibration/coverage@25%": 0.8874851338974479, "calibration/coverage@30%": 0.9246073298429319, "calibration/coverage@5%": 0.05588290426416528, "calibration/ece": 0.22218107648375957, "calibration/mean_confidence": 0.5590917161464007, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013194444444444463, "completions/max_length": 3478.2, "completions/max_terminated_length": 3478.2, "completions/mean_length": 835.9971435546875, "completions/mean_terminated_length": 847.218603515625, "completions/min_length": 0.0, "completions/min_terminated_length": 253.8, "epoch": 0.44399445006937416, "grad_norm": 0.0003270205343142152, "learning_rate": 6.927710843373495e-07, "loss": -0.008, "num_tokens": 419471262.0, "reward": 0.9919918060302735, "reward_std": 0.14110299646854402, "rewards/accuracy_reward": 0.68828125, "rewards/brier_reward": 0.776080322265625, "rewards/confidence_uniqueness_reward": 0.9390787482261658, "rewards/format_reward": 0.98671875, "rewards/frontier_aurc_reward": -0.0019206261495128274, "rewards/frontier_ece_reward": 0.0002056588651612401, "rewards/frontier_entropy_batch_reward": -0.2390881210565567, "rewards/volume_coverage_0": -9.607167283931516e-11, "rewards/volume_coverage_1": -9.607167283931516e-11, "rewards/volume_coverage_10": 6.878886154026986e-08, "rewards/volume_coverage_15": 8.149280984071083e-06, "rewards/volume_coverage_20": 0.008166126534342765, "rewards/volume_coverage_25": 0.060707013309001925, "rewards/volume_coverage_5": -2.027620027433841e-09, "signal/accuracy_reward/centered_abs_mean": 0.16101888120174407, "signal/accuracy_reward/group_std_mean": 0.21531691253185273, "signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08050944060087203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08050944060087203, "signal/advantage_abs_mean": 0.10527712404727936, "signal/advantage_pre_scale_abs_mean": 0.10527712404727936, "signal/advantage_pre_scale_std": 0.16531254947185517, "signal/advantage_std": 0.16531254947185517, "signal/brier_reward/centered_abs_mean": 0.1594757229089737, "signal/brier_reward/group_std_mean": 0.202311235666275, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015947572141885757, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015947572141885757, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03095320761203766, "signal/confidence_uniqueness_reward/group_std_mean": 0.05040250569581985, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030953207984566688, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030953207984566688, "signal/format_reward/centered_abs_mean": 0.020122612453997137, "signal/format_reward/group_std_mean": 0.03736539520323277, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010061306226998568, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010061306226998568, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025723907630890606, "signal/frontier_aurc_reward/group_std_mean": 0.004795023193582893, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.215488541172817e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.215488541172817e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.032742565497756004, "signal/frontier_ece_reward/group_std_mean": 0.04218977615237236, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032742566429078577, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032742566429078577, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3048552870750427, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37814642786979674, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030485530197620393, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030485530197620393, "signal/volume_coverage_0/centered_abs_mean": 6.561989396303147e-09, "signal/volume_coverage_0/group_std_mean": 8.45127092752307e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.7694444537162781, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.561989479569874e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.561989479569874e-10, "signal/volume_coverage_1/centered_abs_mean": 6.561989396303147e-09, "signal/volume_coverage_1/group_std_mean": 8.45127092752307e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.7694444537162781, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.561989479569874e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.561989479569874e-10, "signal/volume_coverage_10/centered_abs_mean": 3.432544644965674e-07, "signal/volume_coverage_10/group_std_mean": 4.456041097000707e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.28333333432674407, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.432544453119135e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.432544453119135e-08, "signal/volume_coverage_15/centered_abs_mean": 0.0001473607844673097, "signal/volume_coverage_15/group_std_mean": 0.00019077141769230366, "signal/volume_coverage_15/group_zero_std_frac": 0.2194444477558136, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4736078446730971e-05, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 1.4736078446730971e-05, "signal/volume_coverage_20/centered_abs_mean": 0.016519613564014435, "signal/volume_coverage_20/group_std_mean": 0.021331942826509475, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0016519613796845078, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0016519613796845078, "signal/volume_coverage_25/centered_abs_mean": 0.05676387697458267, "signal/volume_coverage_25/group_std_mean": 0.07344103008508682, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.005676387995481491, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.005676387995481491, "signal/volume_coverage_5/centered_abs_mean": 1.644520555288409e-08, "signal/volume_coverage_5/group_std_mean": 2.145805164666115e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6805555582046509, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.6445205613946356e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 1.6445205613946356e-09, "step": 185 }, { "calibration/aurc": 0.16563327498860908, "calibration/batch_distribution_entropy": 0.9669831065238427, "calibration/buffer_distribution_entropy": 0.9883207647652302, "calibration/confidence_entropy": 0.49194281575766324, "calibration/coverage@0%": 0.023620188208345654, "calibration/coverage@1%": 0.023620188208345654, "calibration/coverage@10%": 0.3429785884142111, "calibration/coverage@15%": 0.4899246732300961, "calibration/coverage@20%": 0.6019432235082157, "calibration/coverage@25%": 0.9215743919738493, "calibration/coverage@30%": 0.9895259186351705, "calibration/coverage@5%": 0.17279849807400702, "calibration/ece": 0.20003581041724522, "calibration/mean_confidence": 0.5874865962793766, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00894097222222221, "completions/max_length": 3464.2, "completions/max_terminated_length": 3464.2, "completions/mean_length": 830.3328247070312, "completions/mean_terminated_length": 837.8548095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 262.8, "epoch": 0.45599430007124914, "grad_norm": 0.00036124690086580813, "learning_rate": 5.421686746987952e-07, "loss": -0.0063, "num_tokens": 432119640.0, "reward": 1.01535884141922, "reward_std": 0.13885502517223358, "rewards/accuracy_reward": 0.7321180582046509, "rewards/brier_reward": 0.7801229596138001, "rewards/confidence_uniqueness_reward": 0.9418362855911255, "rewards/format_reward": 0.9910590291023255, "rewards/frontier_aurc_reward": -0.0021081025479361415, "rewards/frontier_ece_reward": -0.0038828586577437816, "rewards/frontier_entropy_batch_reward": -0.26736214458942414, "rewards/volume_coverage_0": -1.141676597010699e-09, "rewards/volume_coverage_1": -1.141676597010699e-09, "rewards/volume_coverage_10": -1.2779696589859668e-08, "rewards/volume_coverage_15": -9.471884404774756e-05, "rewards/volume_coverage_20": 0.010956103447824717, "rewards/volume_coverage_25": 0.07639060616493225, "rewards/volume_coverage_5": -8.485374047850769e-09, "signal/accuracy_reward/centered_abs_mean": 0.1593641459941864, "signal/accuracy_reward/group_std_mean": 0.2131967216730118, "signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0796820729970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0796820729970932, "signal/advantage_abs_mean": 0.10268108397722245, "signal/advantage_pre_scale_abs_mean": 0.10268108397722245, "signal/advantage_pre_scale_std": 0.1636344462633133, "signal/advantage_std": 0.1636344462633133, "signal/brier_reward/centered_abs_mean": 0.15661969482898713, "signal/brier_reward/group_std_mean": 0.19965132474899291, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015661969408392908, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015661969408392908, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02808639667928219, "signal/confidence_uniqueness_reward/group_std_mean": 0.04723411276936531, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028086398728191853, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028086398728191853, "signal/format_reward/centered_abs_mean": 0.016227213107049464, "signal/format_reward/group_std_mean": 0.033081219717860225, "signal/format_reward/group_zero_std_frac": 0.8583333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008113606553524732, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008113606553524732, "signal/frontier_aurc_reward/centered_abs_mean": 0.00276075005531311, "signal/frontier_aurc_reward/group_std_mean": 0.004973709024488926, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4509377292124556e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4509377292124556e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.030482398346066475, "signal/frontier_ece_reward/group_std_mean": 0.039097010344266894, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003048239927738905, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003048239927738905, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3309023678302765, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40246840119361876, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03309023603796959, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03309023603796959, "signal/volume_coverage_0/centered_abs_mean": 6.612283298679245e-09, "signal/volume_coverage_0/group_std_mean": 8.463888301335488e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.725000011920929, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.612283387497087e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.612283387497087e-10, "signal/volume_coverage_1/centered_abs_mean": 6.612283298679245e-09, "signal/volume_coverage_1/group_std_mean": 8.463888301335488e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.725000011920929, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.612283387497087e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.612283387497087e-10, "signal/volume_coverage_10/centered_abs_mean": 3.15068140821495e-07, "signal/volume_coverage_10/group_std_mean": 3.9909486417855076e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.3861111134290695, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.150681564534352e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.150681564534352e-08, "signal/volume_coverage_15/centered_abs_mean": 0.000732619600603357, "signal/volume_coverage_15/group_std_mean": 0.0009918127965647728, "signal/volume_coverage_15/group_zero_std_frac": 0.03611111212521791, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.326196137000807e-05, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 7.326196137000807e-05, "signal/volume_coverage_20/centered_abs_mean": 0.01917654536664486, "signal/volume_coverage_20/group_std_mean": 0.024732422083616257, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.001917654532007873, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.001917654532007873, "signal/volume_coverage_25/centered_abs_mean": 0.06703869476914406, "signal/volume_coverage_25/group_std_mean": 0.08638201355934143, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.006703869812190533, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.006703869812190533, "signal/volume_coverage_5/centered_abs_mean": 3.047114027765474e-08, "signal/volume_coverage_5/group_std_mean": 3.937505539397535e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.6055555582046509, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.047113938947632e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 3.047113938947632e-09, "step": 190 }, { "calibration/aurc": 0.18204306394164244, "calibration/batch_distribution_entropy": 0.9729749328000548, "calibration/buffer_distribution_entropy": 0.9885560786006696, "calibration/confidence_entropy": 0.49102789876075714, "calibration/coverage@0%": 0.017981751165717673, "calibration/coverage@1%": 0.017981751165717673, "calibration/coverage@10%": 0.35273791173668567, "calibration/coverage@15%": 0.45626031578387644, "calibration/coverage@20%": 0.6275733840818873, "calibration/coverage@25%": 0.8232062883198724, "calibration/coverage@30%": 0.8748987198185059, "calibration/coverage@5%": 0.07761235802587599, "calibration/ece": 0.1691368534184712, "calibration/mean_confidence": 0.5745053270312936, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01614583333333335, "completions/max_length": 3499.8, "completions/max_terminated_length": 3499.8, "completions/mean_length": 829.2237915039062, "completions/mean_terminated_length": 842.9637573242187, "completions/min_length": 0.0, "completions/min_terminated_length": 260.8, "epoch": 0.46799415007312406, "grad_norm": 0.0003712046018335968, "learning_rate": 3.91566265060241e-07, "loss": -0.0116, "num_tokens": 444753162.0, "reward": 0.9810139179229737, "reward_std": 0.1448762148618698, "rewards/accuracy_reward": 0.6711805582046508, "rewards/brier_reward": 0.7673219919204712, "rewards/confidence_uniqueness_reward": 0.9353937387466431, "rewards/format_reward": 0.9837673664093017, "rewards/frontier_aurc_reward": -0.0024833133444190024, "rewards/frontier_ece_reward": -0.0005498200946021826, "rewards/frontier_entropy_batch_reward": -0.25182714462280276, "rewards/volume_coverage_0": -5.590650969944377e-10, "rewards/volume_coverage_1": -5.590650969944377e-10, "rewards/volume_coverage_10": -2.345436207473739e-08, "rewards/volume_coverage_15": 5.4893085780349794e-05, "rewards/volume_coverage_20": 0.013210531510412692, "rewards/volume_coverage_25": 0.07210558652877808, "rewards/volume_coverage_5": -4.368226069750847e-09, "signal/accuracy_reward/centered_abs_mean": 0.15710720419883728, "signal/accuracy_reward/group_std_mean": 0.2115005522966385, "signal/accuracy_reward/group_zero_std_frac": 0.3833333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07855360209941864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07855360209941864, "signal/advantage_abs_mean": 0.10682297945022583, "signal/advantage_pre_scale_abs_mean": 0.10682297945022583, "signal/advantage_pre_scale_std": 0.17181708216667174, "signal/advantage_std": 0.17181708216667174, "signal/brier_reward/centered_abs_mean": 0.15967849493026734, "signal/brier_reward/group_std_mean": 0.20298723876476288, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015967848524451256, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015967848524451256, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034313973411917685, "signal/confidence_uniqueness_reward/group_std_mean": 0.0535574808716774, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003431397257372737, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003431397257372737, "signal/format_reward/centered_abs_mean": 0.02379014752805233, "signal/format_reward/group_std_mean": 0.040867094323039053, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011895073764026164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011895073764026164, "signal/frontier_aurc_reward/centered_abs_mean": 0.00311872442252934, "signal/frontier_aurc_reward/group_std_mean": 0.00557683790102601, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8984056300250816e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8984056300250816e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02924216091632843, "signal/frontier_ece_reward/group_std_mean": 0.03790022060275078, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00292421611957252, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00292421611957252, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30970343947410583, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38179963231086733, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030970345064997674, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030970345064997674, "signal/volume_coverage_0/centered_abs_mean": 7.757238851269221e-09, "signal/volume_coverage_0/group_std_mean": 1.0284649754055408e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.6805555582046509, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.757239073313826e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 7.757239073313826e-10, "signal/volume_coverage_1/centered_abs_mean": 7.757238851269221e-09, "signal/volume_coverage_1/group_std_mean": 1.0284649754055408e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.6805555582046509, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.757239073313826e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 7.757239073313826e-10, "signal/volume_coverage_10/centered_abs_mean": 3.1254682539838543e-07, "signal/volume_coverage_10/group_std_mean": 4.123982932924264e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.35277777910232544, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.125468204245863e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.125468204245863e-08, "signal/volume_coverage_15/centered_abs_mean": 0.0020562252262607216, "signal/volume_coverage_15/group_std_mean": 0.002781915059313178, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.00020562254067044706, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.00020562254067044706, "signal/volume_coverage_20/centered_abs_mean": 0.020905856043100357, "signal/volume_coverage_20/group_std_mean": 0.02738172821700573, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.002090585697442293, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.002090585697442293, "signal/volume_coverage_25/centered_abs_mean": 0.07110105603933334, "signal/volume_coverage_25/group_std_mean": 0.09250357747077942, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00711010554805398, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.00711010554805398, "signal/volume_coverage_5/centered_abs_mean": 4.895319065667536e-08, "signal/volume_coverage_5/group_std_mean": 6.477631941947948e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.5333333373069763, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.895319172248947e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.895319172248947e-09, "step": 195 }, { "calibration/aurc": 0.17619245644327491, "calibration/batch_distribution_entropy": 0.9541203111935174, "calibration/buffer_distribution_entropy": 0.9881714641504811, "calibration/confidence_entropy": 0.507531712440415, "calibration/coverage@0%": 0.013553948344807684, "calibration/coverage@1%": 0.013553948344807684, "calibration/coverage@10%": 0.22703562373747785, "calibration/coverage@15%": 0.37528082427196596, "calibration/coverage@20%": 0.6159289333624963, "calibration/coverage@25%": 0.9096808862433863, "calibration/coverage@30%": 0.9412698412698413, "calibration/coverage@5%": 0.15962724677412704, "calibration/ece": 0.1774361783998174, "calibration/mean_confidence": 0.6063874822733339, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008420138888888862, "completions/max_length": 3395.8, "completions/max_terminated_length": 3395.8, "completions/mean_length": 813.6625854492188, "completions/mean_terminated_length": 820.6150024414062, "completions/min_length": 0.0, "completions/min_terminated_length": 268.2, "epoch": 0.47999400007499904, "grad_norm": 0.00035624156589619815, "learning_rate": 2.409638554216868e-07, "loss": -0.0055, "num_tokens": 457194363.0, "reward": 1.0029555439949036, "reward_std": 0.13360524475574492, "rewards/accuracy_reward": 0.7014756917953491, "rewards/brier_reward": 0.7836914658546448, "rewards/confidence_uniqueness_reward": 0.9424945712089539, "rewards/format_reward": 0.9915798544883728, "rewards/frontier_aurc_reward": -0.0024135842453688384, "rewards/frontier_ece_reward": -0.001841819501714781, "rewards/frontier_entropy_batch_reward": -0.2604458272457123, "rewards/volume_coverage_0": -2.4506557894099502e-11, "rewards/volume_coverage_1": -2.4506557894099502e-11, "rewards/volume_coverage_10": 1.3166838286338133e-09, "rewards/volume_coverage_15": 0.00029627673065988345, "rewards/volume_coverage_20": 0.016802585497498513, "rewards/volume_coverage_25": 0.0835825502872467, "rewards/volume_coverage_5": 1.605325672393576e-09, "signal/accuracy_reward/centered_abs_mean": 0.14759657382965088, "signal/accuracy_reward/group_std_mean": 0.19293810725212096, "signal/accuracy_reward/group_zero_std_frac": 0.4555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07379828691482544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07379828691482544, "signal/advantage_abs_mean": 0.10026623159646988, "signal/advantage_pre_scale_abs_mean": 0.10026623159646988, "signal/advantage_pre_scale_std": 0.15953322649002075, "signal/advantage_std": 0.15953322649002075, "signal/brier_reward/centered_abs_mean": 0.15002098083496093, "signal/brier_reward/group_std_mean": 0.1909335136413574, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015002098679542542, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015002098679542542, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025808610394597052, "signal/confidence_uniqueness_reward/group_std_mean": 0.04277213215827942, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002580861235037446, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002580861235037446, "signal/format_reward/centered_abs_mean": 0.014534505270421504, "signal/format_reward/group_std_mean": 0.029166242480278014, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007267252635210752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007267252635210752, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030365238897502424, "signal/frontier_aurc_reward/group_std_mean": 0.005425933655351401, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.795655138674192e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.795655138674192e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02757507599890232, "signal/frontier_ece_reward/group_std_mean": 0.03539729043841362, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002757507748901844, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002757507748901844, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31177434921264646, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3830597996711731, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03117743618786335, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03117743618786335, "signal/volume_coverage_0/centered_abs_mean": 5.777263822892564e-09, "signal/volume_coverage_0/group_std_mean": 7.668372603575335e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.7388888955116272, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.777263667461341e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 5.777263667461341e-10, "signal/volume_coverage_1/centered_abs_mean": 5.777263822892564e-09, "signal/volume_coverage_1/group_std_mean": 7.668372603575335e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.7388888955116272, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.777263667461341e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 5.777263667461341e-10, "signal/volume_coverage_10/centered_abs_mean": 2.574304829749963e-07, "signal/volume_coverage_10/group_std_mean": 3.39649250236107e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.31944445371627805, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.5743048936988088e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 2.5743048936988088e-08, "signal/volume_coverage_15/centered_abs_mean": 0.0032336109317839144, "signal/volume_coverage_15/group_std_mean": 0.004335348587483167, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.00032336109434254466, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.00032336109434254466, "signal/volume_coverage_20/centered_abs_mean": 0.022926723957061766, "signal/volume_coverage_20/group_std_mean": 0.0293125681579113, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00229267249815166, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.00229267249815166, "signal/volume_coverage_25/centered_abs_mean": 0.07852394431829453, "signal/volume_coverage_25/group_std_mean": 0.10077835321426391, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.007852394692599773, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.007852394692599773, "signal/volume_coverage_5/centered_abs_mean": 5.1238904319461655e-08, "signal/volume_coverage_5/group_std_mean": 6.800436693765733e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.4333333373069763, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.123890467473302e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 5.123890467473302e-09, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.1729246337459238, "eval_calibration/batch_distribution_entropy": 0.908734295978599, "eval_calibration/buffer_distribution_entropy": 0.9873650506140915, "eval_calibration/confidence_entropy": 0.4987768570492685, "eval_calibration/coverage@0%": 0.15104166666666666, "eval_calibration/coverage@1%": 0.15104166666666666, "eval_calibration/coverage@10%": 0.375, "eval_calibration/coverage@15%": 0.5677083333333334, "eval_calibration/coverage@20%": 0.7916666666666666, "eval_calibration/coverage@25%": 0.9322916666666666, "eval_calibration/coverage@30%": 0.96875, "eval_calibration/coverage@5%": 0.21875, "eval_calibration/ece": 0.25312968750000003, "eval_calibration/mean_confidence": 0.5731484375, "eval_completions/clipped_ratio": 0.008680555555555561, "eval_completions/max_length": 2195.6666666666665, "eval_completions/max_terminated_length": 2195.6666666666665, "eval_completions/mean_length": 804.9442952473959, "eval_completions/mean_terminated_length": 812.1565144856771, "eval_completions/min_length": 130.66666666666666, "eval_completions/min_terminated_length": 333.0, "eval_loss": 0.0, "eval_num_tokens": 457194363.0, "eval_reward": 0.9083906412124634, "eval_reward_std": 0.260540634393692, "eval_rewards/accuracy_reward": 0.6718749900658926, "eval_rewards/brier_reward": 0.7771714429060618, "eval_rewards/confidence_uniqueness_reward": 0.8881837427616119, "eval_rewards/format_reward": 0.9904513955116272, "eval_rewards/frontier_aurc_reward": -0.002305791092415651, "eval_rewards/frontier_ece_reward": -0.00047329728355786454, "eval_rewards/frontier_entropy_batch_reward": -0.9904513955116272, "eval_rewards/volume_coverage_0": 2.7870893556010873e-10, "eval_rewards/volume_coverage_1": 2.7870893556010873e-10, "eval_rewards/volume_coverage_10": 2.718800632776445e-08, "eval_rewards/volume_coverage_15": 0.0007767819847686042, "eval_rewards/volume_coverage_20": 0.017239811054120462, "eval_rewards/volume_coverage_25": 0.08011540894707044, "eval_rewards/volume_coverage_5": 2.873590402667029e-09, "eval_runtime": 186.5897, "eval_samples_per_second": 5.359, "eval_signal/accuracy_reward/centered_abs_mean": 0.4265408019224803, "eval_signal/accuracy_reward/group_std_mean": 0.46809791525204975, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21327040096124014, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21327040096124014, "eval_signal/advantage_abs_mean": 0.23001150538523993, "eval_signal/advantage_pre_scale_abs_mean": 0.23001150538523993, "eval_signal/advantage_pre_scale_std": 0.2585917264223099, "eval_signal/advantage_std": 0.2585917264223099, "eval_signal/brier_reward/centered_abs_mean": 0.1993307818969091, "eval_signal/brier_reward/group_std_mean": 0.2538089131315549, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019933079058925312, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019933079058925312, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.051736210783322654, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0815204003204902, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0051736211171373725, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0051736211171373725, "eval_signal/format_reward/centered_abs_mean": 0.018391927083333332, "eval_signal/format_reward/group_std_mean": 0.051025692373514175, "eval_signal/format_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963541666666, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963541666666, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003810435184277594, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007476490068559845, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.763044307765085e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.763044307765085e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.036483422542611756, "eval_signal/frontier_ece_reward/group_std_mean": 0.050309122850497566, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036483421766509614, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036483421766509614, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018391927083333332, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.051025692373514175, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018391927975850801, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018391927975850801, "eval_signal/volume_coverage_0/centered_abs_mean": 7.0012612004196244e-09, "eval_signal/volume_coverage_0/group_std_mean": 9.86357751031619e-09, "eval_signal/volume_coverage_0/group_zero_std_frac": 0.6666666865348816, "eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.00126142246423e-10, "eval_signal/volume_coverage_0/weight": 0.10000000149011612, "eval_signal/volume_coverage_0/weighted_centered_abs_mean": 7.00126142246423e-10, "eval_signal/volume_coverage_1/centered_abs_mean": 7.0012612004196244e-09, "eval_signal/volume_coverage_1/group_std_mean": 9.86357751031619e-09, "eval_signal/volume_coverage_1/group_zero_std_frac": 0.6666666865348816, "eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.00126142246423e-10, "eval_signal/volume_coverage_1/weight": 0.10000000149011612, "eval_signal/volume_coverage_1/weighted_centered_abs_mean": 7.00126142246423e-10, "eval_signal/volume_coverage_10/centered_abs_mean": 4.954252356507519e-07, "eval_signal/volume_coverage_10/group_std_mean": 6.970840994805864e-07, "eval_signal/volume_coverage_10/group_zero_std_frac": 0.2777777860562007, "eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.954252534143203e-08, "eval_signal/volume_coverage_10/weight": 0.10000000149011612, "eval_signal/volume_coverage_10/weighted_centered_abs_mean": 4.954252534143203e-08, "eval_signal/volume_coverage_15/centered_abs_mean": 0.005268752574920654, "eval_signal/volume_coverage_15/group_std_mean": 0.00819743393609921, "eval_signal/volume_coverage_15/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.0005268752429401502, "eval_signal/volume_coverage_15/weight": 0.10000000149011612, "eval_signal/volume_coverage_15/weighted_centered_abs_mean": 0.0005268752429401502, "eval_signal/volume_coverage_20/centered_abs_mean": 0.031609114880363144, "eval_signal/volume_coverage_20/group_std_mean": 0.04171175882220268, "eval_signal/volume_coverage_20/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.003160911495797336, "eval_signal/volume_coverage_20/weight": 0.10000000149011612, "eval_signal/volume_coverage_20/weighted_centered_abs_mean": 0.003160911495797336, "eval_signal/volume_coverage_25/centered_abs_mean": 0.12539048989613852, "eval_signal/volume_coverage_25/group_std_mean": 0.1601824959119161, "eval_signal/volume_coverage_25/group_zero_std_frac": 0.0, "eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.012539049455275139, "eval_signal/volume_coverage_25/weight": 0.10000000149011612, "eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.012539049455275139, "eval_signal/volume_coverage_5/centered_abs_mean": 7.264358714564878e-08, "eval_signal/volume_coverage_5/group_std_mean": 1.0225969523010765e-07, "eval_signal/volume_coverage_5/group_zero_std_frac": 0.3333333432674408, "eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.26435896621543e-09, "eval_signal/volume_coverage_5/weight": 0.10000000149011612, "eval_signal/volume_coverage_5/weighted_centered_abs_mean": 7.26435896621543e-09, "eval_steps_per_second": 0.032, "step": 200 }, { "calibration/aurc": 0.16806944583325462, "calibration/batch_distribution_entropy": 0.9493498137992444, "calibration/buffer_distribution_entropy": 0.9872563989166057, "calibration/confidence_entropy": 0.4816480546128011, "calibration/coverage@0%": 0.020008968258164346, "calibration/coverage@1%": 0.020008968258164346, "calibration/coverage@10%": 0.20190446255225694, "calibration/coverage@15%": 0.5117778930717993, "calibration/coverage@20%": 0.8431347047800399, "calibration/coverage@25%": 0.9027516403741449, "calibration/coverage@30%": 0.9512983386849087, "calibration/coverage@5%": 0.023683508940579044, "calibration/ece": 0.1584036925844378, "calibration/mean_confidence": 0.6165435627133257, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3255.8, "completions/max_terminated_length": 3255.8, "completions/mean_length": 825.996875, "completions/mean_terminated_length": 836.0012451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 253.2, "epoch": 0.491993850076874, "grad_norm": 0.0002999586286023259, "learning_rate": 9.036144578313253e-08, "loss": -0.0083, "num_tokens": 469775799.0, "reward": 1.0257537722587586, "reward_std": 0.136698442697525, "rewards/accuracy_reward": 0.7496527791023254, "rewards/brier_reward": 0.7866234183311462, "rewards/confidence_uniqueness_reward": 0.9380228757858277, "rewards/format_reward": 0.9880208253860474, "rewards/frontier_aurc_reward": -0.0017146203899756074, "rewards/frontier_ece_reward": -0.005589825892820954, "rewards/frontier_entropy_batch_reward": -0.2817916065454483, "rewards/volume_coverage_0": -1.4832471251224888e-09, "rewards/volume_coverage_1": -1.4832471251224888e-09, "rewards/volume_coverage_10": -1.1369402130867456e-07, "rewards/volume_coverage_15": 0.00047319423174485564, "rewards/volume_coverage_20": 0.0234893973916769, "rewards/volume_coverage_25": 0.10815636962652206, "rewards/volume_coverage_5": -1.2138094973579427e-08, "signal/accuracy_reward/centered_abs_mean": 0.14657118022441865, "signal/accuracy_reward/group_std_mean": 0.2037561982870102, "signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07328559011220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07328559011220932, "signal/advantage_abs_mean": 0.09813274145126342, "signal/advantage_pre_scale_abs_mean": 0.09813274145126342, "signal/advantage_pre_scale_std": 0.1621260464191437, "signal/advantage_std": 0.1621260464191437, "signal/brier_reward/centered_abs_mean": 0.14790882170200348, "signal/brier_reward/group_std_mean": 0.18837517201900483, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014790883474051952, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014790883474051952, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030224530026316643, "signal/confidence_uniqueness_reward/group_std_mean": 0.04915469288825989, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003022453049197793, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003022453049197793, "signal/format_reward/centered_abs_mean": 0.01829427070915699, "signal/format_reward/group_std_mean": 0.034758422523736954, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009147135354578496, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009147135354578496, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022363578900694847, "signal/frontier_aurc_reward/group_std_mean": 0.004022621084004641, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.79544747172622e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.79544747172622e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02730635106563568, "signal/frontier_ece_reward/group_std_mean": 0.03513160794973373, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027306349482387306, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027306349482387306, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33183044791221616, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4021769523620605, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03318304568529129, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03318304568529129, "signal/volume_coverage_0/centered_abs_mean": 1.1628204266145303e-08, "signal/volume_coverage_0/group_std_mean": 1.5069850789473095e-08, "signal/volume_coverage_0/group_zero_std_frac": 0.7361111044883728, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1628204399372067e-09, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 1.1628204399372067e-09, "signal/volume_coverage_1/centered_abs_mean": 1.1628204266145303e-08, "signal/volume_coverage_1/group_std_mean": 1.5069850789473095e-08, "signal/volume_coverage_1/group_zero_std_frac": 0.7361111044883728, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1628204399372067e-09, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 1.1628204399372067e-09, "signal/volume_coverage_10/centered_abs_mean": 3.7749450711999086e-07, "signal/volume_coverage_10/group_std_mean": 5.002540035548009e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.3416666805744171, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.7749452275193105e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.7749452275193105e-08, "signal/volume_coverage_15/centered_abs_mean": 0.004773548245429993, "signal/volume_coverage_15/group_std_mean": 0.006471954379230737, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.0004773548396769911, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.0004773548396769911, "signal/volume_coverage_20/centered_abs_mean": 0.0273191150277853, "signal/volume_coverage_20/group_std_mean": 0.03483609855175018, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0027319115120917558, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0027319115120917558, "signal/volume_coverage_25/centered_abs_mean": 0.08157561272382736, "signal/volume_coverage_25/group_std_mean": 0.10533483922481537, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.008157561719417571, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.008157561719417571, "signal/volume_coverage_5/centered_abs_mean": 7.86729152935095e-08, "signal/volume_coverage_5/group_std_mean": 1.0281165501169198e-07, "signal/volume_coverage_5/group_zero_std_frac": 0.4361111164093018, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.867291351715266e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 7.867291351715266e-09, "step": 205 }, { "calibration/aurc": 0.10158368325560434, "calibration/batch_distribution_entropy": 0.9546674144846191, "calibration/buffer_distribution_entropy": 0.9870036950060173, "calibration/confidence_entropy": 0.49870306686443894, "calibration/coverage@0%": 0.0479565659189188, "calibration/coverage@1%": 0.0479565659189188, "calibration/coverage@10%": 0.5728609899799051, "calibration/coverage@15%": 0.7681936033609923, "calibration/coverage@20%": 0.886773839304836, "calibration/coverage@25%": 0.9677753019926273, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.3374138221717951, "calibration/ece": 0.14664376764987133, "calibration/mean_confidence": 0.6115497295637312, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008969907407407404, "completions/max_length": 3451.0, "completions/max_terminated_length": 3451.0, "completions/mean_length": 820.9849446614584, "completions/mean_terminated_length": 828.4159952799479, "completions/min_length": 0.0, "completions/min_terminated_length": 248.33333333333334, "epoch": 0.49919376007799904, "num_tokens": 477316495.0, "reward": 1.004579246044159, "reward_std": 0.13553029050429663, "rewards/accuracy_reward": 0.6993634303410848, "rewards/brier_reward": 0.7872058550516764, "rewards/confidence_uniqueness_reward": 0.941747784614563, "rewards/format_reward": 0.9910300970077515, "rewards/frontier_aurc_reward": -0.0019426853007947404, "rewards/frontier_ece_reward": -0.0012536543266226847, "rewards/frontier_entropy_batch_reward": -0.26403993864854175, "rewards/volume_coverage_0": -6.887284076384503e-10, "rewards/volume_coverage_1": -6.887284076384503e-10, "rewards/volume_coverage_10": -5.683458681460252e-08, "rewards/volume_coverage_15": 0.0013588267417314153, "rewards/volume_coverage_20": 0.025538019835948944, "rewards/volume_coverage_25": 0.10351060579220454, "rewards/volume_coverage_5": -7.1533599784364315e-09, "signal/accuracy_reward/centered_abs_mean": 0.1471534992257754, "signal/accuracy_reward/group_std_mean": 0.19441807766755423, "signal/accuracy_reward/group_zero_std_frac": 0.44907407959302265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0735767496128877, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0735767496128877, "signal/advantage_abs_mean": 0.09917695571978886, "signal/advantage_pre_scale_abs_mean": 0.09917695571978886, "signal/advantage_pre_scale_std": 0.16188943882783255, "signal/advantage_std": 0.16188943882783255, "signal/brier_reward/centered_abs_mean": 0.14869935313860574, "signal/brier_reward/group_std_mean": 0.18970499436060587, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014869935810565948, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014869935810565948, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02755103384455045, "signal/confidence_uniqueness_reward/group_std_mean": 0.04898699869712194, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027551034775873027, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027551034775873027, "signal/format_reward/centered_abs_mean": 0.016511140080789726, "signal/format_reward/group_std_mean": 0.03586030130585035, "signal/format_reward/group_zero_std_frac": 0.8379629651705424, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008255570040394863, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008255570040394863, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025833341448257365, "signal/frontier_aurc_reward/group_std_mean": 0.004829682254542907, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.229167790171535e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.229167790171535e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.027258147795995075, "signal/frontier_ece_reward/group_std_mean": 0.03482848281661669, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002725814857209722, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002725814857209722, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3093116283416748, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3818712929884593, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030931161095698673, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030931161095698673, "signal/volume_coverage_0/centered_abs_mean": 6.647302116154681e-09, "signal/volume_coverage_0/group_std_mean": 8.50487754296599e-09, "signal/volume_coverage_0/group_zero_std_frac": 0.7314814925193787, "signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.647302153162116e-10, "signal/volume_coverage_0/weight": 0.10000000149011612, "signal/volume_coverage_0/weighted_centered_abs_mean": 6.647302153162116e-10, "signal/volume_coverage_1/centered_abs_mean": 6.647302116154681e-09, "signal/volume_coverage_1/group_std_mean": 8.50487754296599e-09, "signal/volume_coverage_1/group_zero_std_frac": 0.7314814925193787, "signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.647302153162116e-10, "signal/volume_coverage_1/weight": 0.10000000149011612, "signal/volume_coverage_1/weighted_centered_abs_mean": 6.647302153162116e-10, "signal/volume_coverage_10/centered_abs_mean": 3.1564475951502874e-07, "signal/volume_coverage_10/group_std_mean": 4.0169316169643327e-07, "signal/volume_coverage_10/group_zero_std_frac": 0.34259260694185895, "signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.156447606992666e-08, "signal/volume_coverage_10/weight": 0.10000000149011612, "signal/volume_coverage_10/weighted_centered_abs_mean": 3.156447606992666e-08, "signal/volume_coverage_15/centered_abs_mean": 0.006117678868273894, "signal/volume_coverage_15/group_std_mean": 0.007960582462449869, "signal/volume_coverage_15/group_zero_std_frac": 0.0, "signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.0006117678907079002, "signal/volume_coverage_15/weight": 0.10000000149011612, "signal/volume_coverage_15/weighted_centered_abs_mean": 0.0006117678907079002, "signal/volume_coverage_20/centered_abs_mean": 0.02946065676709016, "signal/volume_coverage_20/group_std_mean": 0.03762132550279299, "signal/volume_coverage_20/group_zero_std_frac": 0.0, "signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0029460658940176168, "signal/volume_coverage_20/weight": 0.10000000149011612, "signal/volume_coverage_20/weighted_centered_abs_mean": 0.0029460658940176168, "signal/volume_coverage_25/centered_abs_mean": 0.08665728569030762, "signal/volume_coverage_25/group_std_mean": 0.11248831450939178, "signal/volume_coverage_25/group_zero_std_frac": 0.0, "signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.008665728693207106, "signal/volume_coverage_25/weight": 0.10000000149011612, "signal/volume_coverage_25/weighted_centered_abs_mean": 0.008665728693207106, "signal/volume_coverage_5/centered_abs_mean": 4.9763441959764045e-08, "signal/volume_coverage_5/group_std_mean": 6.365335754784003e-08, "signal/volume_coverage_5/group_zero_std_frac": 0.41203704476356506, "signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.976344373612089e-09, "signal/volume_coverage_5/weight": 0.10000000149011612, "signal/volume_coverage_5/weighted_centered_abs_mean": 4.976344373612089e-09, "step": 208, "total_flos": 0.0, "train_loss": -0.008794238732662052, "train_runtime": 40876.3537, "train_samples_per_second": 0.367, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 477316495, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }