{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5100526402040415, "calibration/batch_distribution_entropy": 0.29006821757016016, "calibration/confidence_entropy": 0.2233175770524201, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47929644203736255, "calibration/mean_confidence": 0.9111617651712166, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018142361111111116, "completions/max_length": 4043.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 515.587939453125, "completions/mean_terminated_length": 525.1046997070313, "completions/min_length": 0.0, "completions/min_terminated_length": 2.8, "epoch": 0.011999850001874977, "grad_norm": 0.005509195849299431, "learning_rate": 5.952380952380953e-07, "loss": 0.0067, "num_tokens": 9053781.0, "reward": 0.6579087376594543, "reward_std": 0.6609244465827941, "rewards/accgated_coverage_0": 0.2749160468578339, "rewards/accgated_coverage_1": 0.2749160468578339, "rewards/accgated_coverage_10": 0.2749160468578339, "rewards/accgated_coverage_15": 0.2749160468578339, "rewards/accgated_coverage_20": 0.2749160468578339, "rewards/accgated_coverage_25": 0.2749160468578339, "rewards/accgated_coverage_5": 0.2749160468578339, "rewards/accuracy_reward": 0.2602430522441864, "rewards/brier_reward": 0.3124499797821045, "rewards/confidence_uniqueness_reward": 0.2885975897312164, "rewards/format_reward": 0.6042534708976746, "rewards/frontier_aurc_reward": 0.2749160468578339, "rewards/frontier_ece_reward": 0.2749160468578339, "rewards/frontier_entropy_batch_reward": -0.5781359553337098, "signal/accgated_coverage_0/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_0/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_1/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_1/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_10/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_10/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_15/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_15/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_20/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_20/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_25/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_25/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_5/centered_abs_mean": 0.3091569423675537, "signal/accgated_coverage_5/group_std_mean": 0.36718695163726806, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.030915693566203117, "signal/accuracy_reward/centered_abs_mean": 0.30717231035232545, "signal/accuracy_reward/group_std_mean": 0.3660562574863434, "signal/accuracy_reward/group_zero_std_frac": 0.10000000074505806, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15358615517616273, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15358615517616273, "signal/advantage_abs_mean": 0.5620251536369324, "signal/advantage_pre_scale_abs_mean": 0.5620251536369324, "signal/advantage_pre_scale_std": 0.6785519242286682, "signal/advantage_std": 0.6785519242286682, "signal/brier_reward/centered_abs_mean": 0.3185951590538025, "signal/brier_reward/group_std_mean": 0.3719545781612396, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03185951597988605, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03185951597988605, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2353891611099243, "signal/confidence_uniqueness_reward/group_std_mean": 0.28763567209243773, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023538917675614356, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023538917675614356, "signal/format_reward/centered_abs_mean": 0.4384060263633728, "signal/format_reward/group_std_mean": 0.4737804293632507, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2192030131816864, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2192030131816864, "signal/frontier_aurc_reward/centered_abs_mean": 0.3091569423675537, "signal/frontier_aurc_reward/group_std_mean": 0.36718695163726806, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038644616957753896, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038644616957753896, "signal/frontier_ece_reward/centered_abs_mean": 0.3091569423675537, "signal/frontier_ece_reward/group_std_mean": 0.36718695163726806, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030915693566203117, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030915693566203117, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4478966951370239, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4815566301345825, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.044789671897888184, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044789671897888184, "step": 5 }, { "calibration/aurc": 0.5252471496664904, "calibration/batch_distribution_entropy": 0.25438653899177793, "calibration/confidence_entropy": 0.22173958000055646, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.471072254833574, "calibration/mean_confidence": 0.9203944885303501, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016232638888888908, "completions/max_length": 4008.4, "completions/max_terminated_length": 4008.4, "completions/mean_length": 482.696630859375, "completions/mean_terminated_length": 490.9005981445313, "completions/min_length": 0.0, "completions/min_terminated_length": 7.8, "epoch": 0.023999700003749954, "grad_norm": 0.004243232775479555, "learning_rate": 1.1904761904761906e-06, "loss": 0.0057, "num_tokens": 17697166.0, "reward": 0.7655806422233582, "reward_std": 0.6458010196685791, "rewards/accgated_coverage_0": 0.3147114455699921, "rewards/accgated_coverage_1": 0.3147114455699921, "rewards/accgated_coverage_10": 0.3147114455699921, "rewards/accgated_coverage_15": 0.3147114455699921, "rewards/accgated_coverage_20": 0.3147114455699921, "rewards/accgated_coverage_25": 0.3147114455699921, "rewards/accgated_coverage_5": 0.3147114455699921, "rewards/accuracy_reward": 0.2984375, "rewards/brier_reward": 0.3611126124858856, "rewards/confidence_uniqueness_reward": 0.3588679790496826, "rewards/format_reward": 0.7130208492279053, "rewards/frontier_aurc_reward": 0.3147114455699921, "rewards/frontier_ece_reward": 0.3147114455699921, "rewards/frontier_entropy_batch_reward": -0.6784967422485352, "signal/accgated_coverage_0/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_0/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_1/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_1/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_10/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_10/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_15/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_15/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_20/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_20/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_25/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_25/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_5/centered_abs_mean": 0.3180821776390076, "signal/accgated_coverage_5/group_std_mean": 0.37312147617340086, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.03180821873247623, "signal/accuracy_reward/centered_abs_mean": 0.3222547709941864, "signal/accuracy_reward/group_std_mean": 0.3800747275352478, "signal/accuracy_reward/group_zero_std_frac": 0.0777777798473835, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1611273854970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1611273854970932, "signal/advantage_abs_mean": 0.5453694581985473, "signal/advantage_pre_scale_abs_mean": 0.5453694581985473, "signal/advantage_pre_scale_std": 0.6612506985664368, "signal/advantage_std": 0.6612506985664368, "signal/brier_reward/centered_abs_mean": 0.3204429686069489, "signal/brier_reward/group_std_mean": 0.3723629653453827, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03204429745674133, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03204429745674133, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22323725521564483, "signal/confidence_uniqueness_reward/group_std_mean": 0.28084011673927306, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022323725372552873, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022323725372552873, "signal/format_reward/centered_abs_mean": 0.3584092855453491, "signal/format_reward/group_std_mean": 0.42185020446777344, "signal/format_reward/group_zero_std_frac": 0.00555555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17920464277267456, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17920464277267456, "signal/frontier_aurc_reward/centered_abs_mean": 0.3180821776390076, "signal/frontier_aurc_reward/group_std_mean": 0.37312147617340086, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003976027341559529, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003976027341559529, "signal/frontier_ece_reward/centered_abs_mean": 0.3180821776390076, "signal/frontier_ece_reward/group_std_mean": 0.37312147617340086, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03180821873247623, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03180821873247623, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3863869488239288, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44503648281097413, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03863869607448578, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03863869607448578, "step": 10 }, { "calibration/aurc": 0.5362176870303669, "calibration/batch_distribution_entropy": 0.31565510576164746, "calibration/confidence_entropy": 0.24680094421113, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4811362427740694, "calibration/mean_confidence": 0.9068744050179488, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444464, "completions/max_length": 3983.8, "completions/max_terminated_length": 3983.8, "completions/mean_length": 456.1573852539062, "completions/mean_terminated_length": 460.8325134277344, "completions/min_length": 0.0, "completions/min_terminated_length": 55.2, "epoch": 0.03599955000562493, "grad_norm": 0.0021033829543739557, "learning_rate": 1.7857142857142859e-06, "loss": -0.0113, "num_tokens": 26054083.0, "reward": 0.9580986380577088, "reward_std": 0.555222624540329, "rewards/accgated_coverage_0": 0.37856481671333314, "rewards/accgated_coverage_1": 0.37856481671333314, "rewards/accgated_coverage_10": 0.37856481671333314, "rewards/accgated_coverage_15": 0.37856481671333314, "rewards/accgated_coverage_20": 0.37856481671333314, "rewards/accgated_coverage_25": 0.37856481671333314, "rewards/accgated_coverage_5": 0.37856481671333314, "rewards/accuracy_reward": 0.3509548604488373, "rewards/brier_reward": 0.4531111419200897, "rewards/confidence_uniqueness_reward": 0.507493644952774, "rewards/format_reward": 0.9373263835906982, "rewards/frontier_aurc_reward": 0.37856481671333314, "rewards/frontier_ece_reward": 0.37856481671333314, "rewards/frontier_entropy_batch_reward": -0.8968637108802795, "signal/accgated_coverage_0/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_0/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_1/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_1/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_10/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_10/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_15/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_15/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_20/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_20/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_25/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_25/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_5/centered_abs_mean": 0.3092223465442657, "signal/accgated_coverage_5/group_std_mean": 0.3651528418064117, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.030922235921025276, "signal/accuracy_reward/centered_abs_mean": 0.31787651777267456, "signal/accuracy_reward/group_std_mean": 0.37809754610061647, "signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15893825888633728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15893825888633728, "signal/advantage_abs_mean": 0.46249008774757383, "signal/advantage_pre_scale_abs_mean": 0.46249008774757383, "signal/advantage_pre_scale_std": 0.5720869541168213, "signal/advantage_std": 0.5720869541168213, "signal/brier_reward/centered_abs_mean": 0.2960561692714691, "signal/brier_reward/group_std_mean": 0.3486612796783447, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029605618491768838, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.029605618491768838, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1899004429578781, "signal/confidence_uniqueness_reward/group_std_mean": 0.23581856489181519, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01899004392325878, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01899004392325878, "signal/format_reward/centered_abs_mean": 0.10666232705116271, "signal/format_reward/group_std_mean": 0.1854827418923378, "signal/format_reward/group_zero_std_frac": 0.3083333432674408, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05333116352558136, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05333116352558136, "signal/frontier_aurc_reward/centered_abs_mean": 0.3092223465442657, "signal/frontier_aurc_reward/group_std_mean": 0.3651528418064117, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038652794901281595, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038652794901281595, "signal/frontier_ece_reward/centered_abs_mean": 0.3092223465442657, "signal/frontier_ece_reward/group_std_mean": 0.3651528418064117, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030922235921025276, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030922235921025276, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17015037536621094, "signal/frontier_entropy_batch_reward/group_std_mean": 0.27351958155632017, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10277778059244155, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017015037685632707, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017015037685632707, "step": 15 }, { "calibration/aurc": 0.43721687510369617, "calibration/batch_distribution_entropy": 0.40072087939511974, "calibration/buffer_distribution_entropy": 0.316298540027451, "calibration/confidence_entropy": 0.3090699022802787, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.03133159268929504, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3651260156369419, "calibration/mean_confidence": 0.8837088543583007, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666652, "completions/max_length": 3783.2, "completions/max_terminated_length": 3783.2, "completions/mean_length": 484.31285400390624, "completions/mean_terminated_length": 489.445556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 86.4, "epoch": 0.04799940000749991, "grad_norm": 0.0007687319302931428, "learning_rate": 2.380952380952381e-06, "loss": -0.0081, "num_tokens": 34747063.0, "reward": 0.8955905079841614, "reward_std": 0.33118437230587006, "rewards/accgated_coverage_0": 0.19059269838035106, "rewards/accgated_coverage_1": 0.19059269838035106, "rewards/accgated_coverage_10": 0.19059269838035106, "rewards/accgated_coverage_15": 0.19059269838035106, "rewards/accgated_coverage_20": 0.19059269838035106, "rewards/accgated_coverage_25": 0.19059269838035106, "rewards/accgated_coverage_5": 0.19059269838035106, "rewards/accuracy_reward": 0.4530381917953491, "rewards/brier_reward": 0.5732760310173035, "rewards/confidence_uniqueness_reward": 0.5944553971290588, "rewards/format_reward": 0.9848090171813965, "rewards/frontier_aurc_reward": 0.18578348318114876, "rewards/frontier_ece_reward": 0.18266455382108687, "rewards/frontier_entropy_batch_reward": -0.9410987496376038, "signal/accgated_coverage_0/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_0/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_1/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_1/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_10/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_10/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_15/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_15/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_20/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_20/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_25/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_25/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_5/centered_abs_mean": 0.11304162042215467, "signal/accgated_coverage_5/group_std_mean": 0.1411184100434184, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.011304162652231753, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.011304162652231753, "signal/accuracy_reward/centered_abs_mean": 0.28528103828430174, "signal/accuracy_reward/group_std_mean": 0.35118488073348997, "signal/accuracy_reward/group_zero_std_frac": 0.10000000149011612, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14264051914215087, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14264051914215087, "signal/advantage_abs_mean": 0.26579251885414124, "signal/advantage_pre_scale_abs_mean": 0.26579251885414124, "signal/advantage_pre_scale_std": 0.3466378539800644, "signal/advantage_std": 0.3466378539800644, "signal/brier_reward/centered_abs_mean": 0.24700996279716492, "signal/brier_reward/group_std_mean": 0.30329431891441344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024700997024774553, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024700997024774553, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18052109479904174, "signal/confidence_uniqueness_reward/group_std_mean": 0.21723917722702027, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018052110448479652, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018052110448479652, "signal/format_reward/centered_abs_mean": 0.02763129323720932, "signal/format_reward/group_std_mean": 0.06006475985050201, "signal/format_reward/group_zero_std_frac": 0.725, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01381564661860466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01381564661860466, "signal/frontier_aurc_reward/centered_abs_mean": 0.1107280052267015, "signal/frontier_aurc_reward/group_std_mean": 0.13636438250541688, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0013841001396940555, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0013841001396940555, "signal/frontier_ece_reward/centered_abs_mean": 0.19009114503860475, "signal/frontier_ece_reward/group_std_mean": 0.22972893118858337, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0190091148018837, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0190091148018837, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1023610308766365, "signal/frontier_entropy_batch_reward/group_std_mean": 0.19736399948596955, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24444444477558136, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010236102901399136, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010236102901399136, "step": 20 }, { "calibration/aurc": 0.3437265656341763, "calibration/batch_distribution_entropy": 0.5656963876081482, "calibration/buffer_distribution_entropy": 0.36739076478574584, "calibration/confidence_entropy": 0.38956441308086304, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.007387862796833773, "calibration/coverage@15%": 0.0079155672823219, "calibration/coverage@20%": 0.1128265344182127, "calibration/coverage@25%": 0.21094365165978254, "calibration/coverage@30%": 0.5211917227624034, "calibration/coverage@5%": 0.0, "calibration/ece": 0.23278605898703247, "calibration/mean_confidence": 0.837022899828915, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010329861111111116, "completions/max_length": 3925.4, "completions/max_terminated_length": 3925.4, "completions/mean_length": 528.5024353027344, "completions/mean_terminated_length": 534.0195007324219, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.05999925000937488, "grad_norm": 0.0007447434472851455, "learning_rate": 2.9761904761904763e-06, "loss": -0.0059, "num_tokens": 43959859.0, "reward": 0.8110012769699096, "reward_std": 0.20988352298736573, "rewards/accgated_coverage_0": -0.0003714228863827884, "rewards/accgated_coverage_1": -0.0003714228863827884, "rewards/accgated_coverage_10": -0.0003714228863827884, "rewards/accgated_coverage_15": -0.0003714228863827884, "rewards/accgated_coverage_20": -0.0003714228863827884, "rewards/accgated_coverage_25": -0.0003714228863827884, "rewards/accgated_coverage_5": -0.0003714228863827884, "rewards/accuracy_reward": 0.5520833313465119, "rewards/brier_reward": 0.6689429998397827, "rewards/confidence_uniqueness_reward": 0.6822849154472351, "rewards/format_reward": 0.9855902791023254, "rewards/frontier_aurc_reward": -0.003991629648953676, "rewards/frontier_ece_reward": 0.013007813505828381, "rewards/frontier_entropy_batch_reward": -0.9394919872283936, "signal/accgated_coverage_0/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_0/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_1/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_1/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_10/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_10/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_15/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_15/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_20/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_20/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_25/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_25/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_5/centered_abs_mean": 0.012830922938883304, "signal/accgated_coverage_5/group_std_mean": 0.019968029484152792, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0012830922496505082, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0012830922496505082, "signal/accuracy_reward/centered_abs_mean": 0.27000868022441865, "signal/accuracy_reward/group_std_mean": 0.3324960470199585, "signal/accuracy_reward/group_zero_std_frac": 0.13888888955116271, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13500434011220933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13500434011220933, "signal/advantage_abs_mean": 0.16560438871383668, "signal/advantage_pre_scale_abs_mean": 0.16560438871383668, "signal/advantage_pre_scale_std": 0.2258564442396164, "signal/advantage_std": 0.2258564442396164, "signal/brier_reward/centered_abs_mean": 0.20755743682384492, "signal/brier_reward/group_std_mean": 0.25747441351413725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020755743607878685, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020755743607878685, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12232207953929901, "signal/confidence_uniqueness_reward/group_std_mean": 0.15422678291797637, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012232208624482155, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012232208624482155, "signal/format_reward/centered_abs_mean": 0.02601996473968029, "signal/format_reward/group_std_mean": 0.053718936443328855, "signal/format_reward/group_zero_std_frac": 0.7638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013009982369840144, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013009982369840144, "signal/frontier_aurc_reward/centered_abs_mean": 0.002716710604727268, "signal/frontier_aurc_reward/group_std_mean": 0.004093794990330935, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3958881977014245e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3958881977014245e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.1095162957906723, "signal/frontier_ece_reward/group_std_mean": 0.1322493463754654, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010951629653573037, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010951629653573037, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10384236574172974, "signal/frontier_entropy_batch_reward/group_std_mean": 0.20167314410209655, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.28611111342906953, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010384235717356204, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010384235717356204, "step": 25 }, { "calibration/aurc": 0.29093421091370353, "calibration/batch_distribution_entropy": 0.6750995975665728, "calibration/buffer_distribution_entropy": 0.45378212856528916, "calibration/confidence_entropy": 0.4977573778236459, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.05318784766795036, "calibration/coverage@25%": 0.21265174577746065, "calibration/coverage@30%": 0.7557201183853943, "calibration/coverage@5%": 0.0, "calibration/ece": 0.12327898021567016, "calibration/mean_confidence": 0.7645972398248402, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016840277777777767, "completions/max_length": 3997.6, "completions/max_terminated_length": 3997.6, "completions/mean_length": 594.1150146484375, "completions/mean_terminated_length": 604.371875, "completions/min_length": 0.0, "completions/min_terminated_length": 121.6, "epoch": 0.07199910001124986, "grad_norm": 0.000512155529577285, "learning_rate": 3.5714285714285718e-06, "loss": -0.007, "num_tokens": 53913984.0, "reward": 0.8377140998840332, "reward_std": 0.18636699616909028, "rewards/accgated_coverage_0": -0.004444451769813895, "rewards/accgated_coverage_1": -0.004444451769813895, "rewards/accgated_coverage_10": -0.004444451769813895, "rewards/accgated_coverage_15": -0.004444451769813895, "rewards/accgated_coverage_20": -0.004444451769813895, "rewards/accgated_coverage_25": -0.004444451769813895, "rewards/accgated_coverage_5": -0.004444451769813895, "rewards/accuracy_reward": 0.600000011920929, "rewards/brier_reward": 0.7193986296653747, "rewards/confidence_uniqueness_reward": 0.7030134916305542, "rewards/format_reward": 0.9794270873069764, "rewards/frontier_aurc_reward": -0.0032217550091445447, "rewards/frontier_ece_reward": 0.01657584626227617, "rewards/frontier_entropy_batch_reward": -0.927468454837799, "signal/accgated_coverage_0/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_0/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_1/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_1/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_10/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_10/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_15/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_15/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_20/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_20/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_25/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_25/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_5/centered_abs_mean": 0.022231166064739228, "signal/accgated_coverage_5/group_std_mean": 0.031239988654851912, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0022231166949495673, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0022231166949495673, "signal/accuracy_reward/centered_abs_mean": 0.23517795503139496, "signal/accuracy_reward/group_std_mean": 0.29286627769470214, "signal/accuracy_reward/group_zero_std_frac": 0.23055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11758897751569748, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11758897751569748, "signal/advantage_abs_mean": 0.14355775713920593, "signal/advantage_pre_scale_abs_mean": 0.14355775713920593, "signal/advantage_pre_scale_std": 0.20758473575115205, "signal/advantage_std": 0.20758473575115205, "signal/brier_reward/centered_abs_mean": 0.16719190776348114, "signal/brier_reward/group_std_mean": 0.21108767986297608, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016719191148877145, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016719191148877145, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1300051212310791, "signal/confidence_uniqueness_reward/group_std_mean": 0.16288177371025087, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01300051212310791, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01300051212310791, "signal/format_reward/centered_abs_mean": 0.03359917588531971, "signal/format_reward/group_std_mean": 0.06414167955517769, "signal/format_reward/group_zero_std_frac": 0.7333333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016799587942659854, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016799587942659854, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017353732837364078, "signal/frontier_aurc_reward/group_std_mean": 0.0027700068429112436, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1692166410502977e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1692166410502977e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06365966349840164, "signal/frontier_ece_reward/group_std_mean": 0.08195510059595108, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006365966238081455, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006365966238081455, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12449786216020584, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23548357784748078, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.21388889253139495, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012449786253273486, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012449786253273486, "step": 30 }, { "calibration/aurc": 0.24915260747354795, "calibration/batch_distribution_entropy": 0.6926192490991758, "calibration/buffer_distribution_entropy": 0.5422166465452627, "calibration/confidence_entropy": 0.5485382320909491, "calibration/coverage@0%": 0.012073490813648294, "calibration/coverage@1%": 0.012073490813648294, "calibration/coverage@10%": 0.08183765599642892, "calibration/coverage@15%": 0.09896438274551941, "calibration/coverage@20%": 0.3158131348849086, "calibration/coverage@25%": 0.5002561456375967, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.015223097112860892, "calibration/ece": 0.07716275614743286, "calibration/mean_confidence": 0.7135749575163094, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020225694444444463, "completions/max_length": 4023.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 624.6056518554688, "completions/mean_terminated_length": 637.64052734375, "completions/min_length": 0.0, "completions/min_terminated_length": 163.8, "epoch": 0.08399895001312484, "grad_norm": 0.0005128980264998972, "learning_rate": 4.166666666666667e-06, "loss": -0.0118, "num_tokens": 64186881.0, "reward": 0.8555383443832397, "reward_std": 0.17517488598823547, "rewards/accgated_coverage_0": -0.01074592862278223, "rewards/accgated_coverage_1": -0.01074592862278223, "rewards/accgated_coverage_10": -0.01074592862278223, "rewards/accgated_coverage_15": -0.01074592862278223, "rewards/accgated_coverage_20": -0.01074592862278223, "rewards/accgated_coverage_25": -0.01074592862278223, "rewards/accgated_coverage_5": -0.01074592862278223, "rewards/accuracy_reward": 0.6256076455116272, "rewards/brier_reward": 0.744514000415802, "rewards/confidence_uniqueness_reward": 0.7291248798370361, "rewards/format_reward": 0.9771701335906983, "rewards/frontier_aurc_reward": -0.0027033671736717223, "rewards/frontier_ece_reward": 0.009526663832366467, "rewards/frontier_entropy_batch_reward": -0.8661114573478699, "signal/accgated_coverage_0/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_0/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_1/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_1/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_10/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_10/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_15/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_15/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_20/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_20/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_25/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_25/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_5/centered_abs_mean": 0.03372676484286785, "signal/accgated_coverage_5/group_std_mean": 0.044625566154718396, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0033726765774190425, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0033726765774190425, "signal/accuracy_reward/centered_abs_mean": 0.21490342915058136, "signal/accuracy_reward/group_std_mean": 0.2722406297922134, "signal/accuracy_reward/group_zero_std_frac": 0.26666666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10745171457529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10745171457529068, "signal/advantage_abs_mean": 0.132158000767231, "signal/advantage_pre_scale_abs_mean": 0.132158000767231, "signal/advantage_pre_scale_std": 0.1931176006793976, "signal/advantage_std": 0.1931176006793976, "signal/brier_reward/centered_abs_mean": 0.1418018341064453, "signal/brier_reward/group_std_mean": 0.18220576047897338, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014180183783173561, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014180183783173561, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12838705331087114, "signal/confidence_uniqueness_reward/group_std_mean": 0.15706448554992675, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012838705442845821, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012838705442845821, "signal/format_reward/centered_abs_mean": 0.03528103269636631, "signal/format_reward/group_std_mean": 0.06216970533132553, "signal/format_reward/group_zero_std_frac": 0.7583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017640516348183154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017640516348183154, "signal/frontier_aurc_reward/centered_abs_mean": 0.001337575796060264, "signal/frontier_aurc_reward/group_std_mean": 0.002151899482123554, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.671969730523415e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.671969730523415e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04378785789012909, "signal/frontier_ece_reward/group_std_mean": 0.06361446380615235, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004378785844892263, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004378785844892263, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21290639340877532, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33500158190727236, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09166666641831397, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021290638856589793, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021290638856589793, "step": 35 }, { "calibration/aurc": 0.27114436702428224, "calibration/batch_distribution_entropy": 0.8070451575586078, "calibration/buffer_distribution_entropy": 0.6061007181434863, "calibration/confidence_entropy": 0.5118545178162875, "calibration/coverage@0%": 0.0037735849056603774, "calibration/coverage@1%": 0.0037735849056603774, "calibration/coverage@10%": 0.005929919137466307, "calibration/coverage@15%": 0.057831138761233844, "calibration/coverage@20%": 0.2453923415759484, "calibration/coverage@25%": 0.48747760107551913, "calibration/coverage@30%": 0.6787977944126284, "calibration/coverage@5%": 0.0037735849056603774, "calibration/ece": 0.10934041252550494, "calibration/mean_confidence": 0.7113611514039639, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018750000000000024, "completions/max_length": 3989.2, "completions/max_terminated_length": 3989.2, "completions/mean_length": 631.6644165039063, "completions/mean_terminated_length": 643.7576416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 168.2, "epoch": 0.09599880001499982, "grad_norm": 0.0004933858290314674, "learning_rate": 4.761904761904762e-06, "loss": -0.0147, "num_tokens": 74583175.0, "reward": 0.9134387254714966, "reward_std": 0.17632506489753724, "rewards/accgated_coverage_0": -0.008661169628612696, "rewards/accgated_coverage_1": -0.008661169628612696, "rewards/accgated_coverage_10": -0.008661169628612696, "rewards/accgated_coverage_15": -0.008661169628612696, "rewards/accgated_coverage_20": -0.008661169628612696, "rewards/accgated_coverage_25": -0.008661169628612696, "rewards/accgated_coverage_5": -0.008661169628612696, "rewards/accuracy_reward": 0.6469618082046509, "rewards/brier_reward": 0.7537546157836914, "rewards/confidence_uniqueness_reward": 0.8956602692604065, "rewards/format_reward": 0.9789930582046509, "rewards/frontier_aurc_reward": -0.00252625304274261, "rewards/frontier_ece_reward": 0.004404827463440597, "rewards/frontier_entropy_batch_reward": -0.5882636129856109, "signal/accgated_coverage_0/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_0/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_1/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_1/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_10/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_10/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_15/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_15/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_20/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_20/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_25/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_25/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_5/centered_abs_mean": 0.04707988202571869, "signal/accgated_coverage_5/group_std_mean": 0.06582793518900872, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004707988444715738, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004707988444715738, "signal/accuracy_reward/centered_abs_mean": 0.19857313632965087, "signal/accuracy_reward/group_std_mean": 0.26094743609428406, "signal/accuracy_reward/group_zero_std_frac": 0.26388889253139497, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09928656816482544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09928656816482544, "signal/advantage_abs_mean": 0.13168415129184724, "signal/advantage_pre_scale_abs_mean": 0.13168415129184724, "signal/advantage_pre_scale_std": 0.1955942243337631, "signal/advantage_std": 0.1955942243337631, "signal/brier_reward/centered_abs_mean": 0.15957279205322267, "signal/brier_reward/group_std_mean": 0.2055341988801956, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01595727913081646, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01595727913081646, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07235777825117111, "signal/confidence_uniqueness_reward/group_std_mean": 0.10315420925617218, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007235778030008078, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007235778030008078, "signal/format_reward/centered_abs_mean": 0.0353515625, "signal/format_reward/group_std_mean": 0.06308476254343987, "signal/format_reward/group_zero_std_frac": 0.7527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01767578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01767578125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019746162462979553, "signal/frontier_aurc_reward/group_std_mean": 0.0032937098294496536, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4682703951839358e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4682703951839358e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05753466859459877, "signal/frontier_ece_reward/group_std_mean": 0.09539404213428497, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005753467138856649, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005753467138856649, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4239261865615845, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4854666531085968, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04239262193441391, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04239262193441391, "step": 40 }, { "calibration/aurc": 0.22503516674594654, "calibration/batch_distribution_entropy": 0.936723476211095, "calibration/buffer_distribution_entropy": 0.6664189748711518, "calibration/confidence_entropy": 0.4939127257586667, "calibration/coverage@0%": 0.0005208333333333333, "calibration/coverage@1%": 0.0005208333333333333, "calibration/coverage@10%": 0.029163725780200794, "calibration/coverage@15%": 0.1030003198965398, "calibration/coverage@20%": 0.431992266240601, "calibration/coverage@25%": 0.8520846136304583, "calibration/coverage@30%": 0.9671087533156498, "calibration/coverage@5%": 0.0005208333333333333, "calibration/ece": 0.16665695734001001, "calibration/mean_confidence": 0.6389931033855959, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017361111111111115, "completions/max_length": 3583.8, "completions/max_terminated_length": 3583.8, "completions/mean_length": 662.6905395507813, "completions/mean_terminated_length": 674.4956298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 186.6, "epoch": 0.1079986500168748, "grad_norm": 0.0004158303199801594, "learning_rate": 4.909638554216868e-06, "loss": -0.016, "num_tokens": 85352634.0, "reward": 0.9387454271316529, "reward_std": 0.17266902327537537, "rewards/accgated_coverage_0": -0.01770685804076493, "rewards/accgated_coverage_1": -0.01770685804076493, "rewards/accgated_coverage_10": -0.01770685804076493, "rewards/accgated_coverage_15": -0.01770685804076493, "rewards/accgated_coverage_20": -0.01770685804076493, "rewards/accgated_coverage_25": -0.01770685804076493, "rewards/accgated_coverage_5": -0.01770685804076493, "rewards/accuracy_reward": 0.647569453716278, "rewards/brier_reward": 0.7374800324440003, "rewards/confidence_uniqueness_reward": 0.9320099115371704, "rewards/format_reward": 0.9820312619209289, "rewards/frontier_aurc_reward": -0.002436440950259566, "rewards/frontier_ece_reward": -0.0011549136601388455, "rewards/frontier_entropy_batch_reward": -0.30463194847106934, "signal/accgated_coverage_0/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_0/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_1/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_1/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_10/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_10/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_15/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_15/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_20/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_20/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_25/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_25/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_5/centered_abs_mean": 0.08323955237865448, "signal/accgated_coverage_5/group_std_mean": 0.11306367218494415, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008323955349624157, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008323955349624157, "signal/accuracy_reward/centered_abs_mean": 0.1957465261220932, "signal/accuracy_reward/group_std_mean": 0.2632771462202072, "signal/accuracy_reward/group_zero_std_frac": 0.23888889253139495, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0978732630610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0978732630610466, "signal/advantage_abs_mean": 0.12990787327289582, "signal/advantage_pre_scale_abs_mean": 0.12990787327289582, "signal/advantage_pre_scale_std": 0.1868172764778137, "signal/advantage_std": 0.1868172764778137, "signal/brier_reward/centered_abs_mean": 0.20081567168235778, "signal/brier_reward/group_std_mean": 0.25101232826709746, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020081567019224165, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020081567019224165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04166658595204353, "signal/confidence_uniqueness_reward/group_std_mean": 0.06948793828487396, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0041666587349027395, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041666587349027395, "signal/format_reward/centered_abs_mean": 0.03059353269636631, "signal/format_reward/group_std_mean": 0.05648069083690643, "signal/format_reward/group_zero_std_frac": 0.7722222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015296766348183156, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015296766348183156, "signal/frontier_aurc_reward/centered_abs_mean": 0.002271978510543704, "signal/frontier_aurc_reward/group_std_mean": 0.003890362149104476, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8399731672834604e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8399731672834604e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05871346145868302, "signal/frontier_ece_reward/group_std_mean": 0.0988737851381302, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0058713463135063645, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0058713463135063645, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36136391162872317, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43039796948432923, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036136391758918765, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036136391758918765, "step": 45 }, { "calibration/aurc": 0.41323223140525994, "calibration/batch_distribution_entropy": 0.9674160680358039, "calibration/buffer_distribution_entropy": 0.7243289379368653, "calibration/confidence_entropy": 0.4737487432824161, "calibration/coverage@0%": 0.005841469816272966, "calibration/coverage@1%": 0.005841469816272966, "calibration/coverage@10%": 0.005841469816272966, "calibration/coverage@15%": 0.011226555171169642, "calibration/coverage@20%": 0.03386231438051376, "calibration/coverage@25%": 0.04409075280684254, "calibration/coverage@30%": 0.22428921542039948, "calibration/coverage@5%": 0.005841469816272966, "calibration/ece": 0.2079400346436131, "calibration/mean_confidence": 0.6007060708093117, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015017361111111094, "completions/max_length": 3660.8, "completions/max_terminated_length": 3660.8, "completions/mean_length": 669.3516479492188, "completions/mean_terminated_length": 679.5452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 165.8, "epoch": 0.11999850001874976, "grad_norm": 0.0004630894400179386, "learning_rate": 4.759036144578314e-06, "loss": -0.0142, "num_tokens": 96161165.0, "reward": 0.9375450730323791, "reward_std": 0.16306346654891968, "rewards/accgated_coverage_0": -0.008981034625321627, "rewards/accgated_coverage_1": -0.008981034625321627, "rewards/accgated_coverage_10": -0.008981034625321627, "rewards/accgated_coverage_15": -0.008981034625321627, "rewards/accgated_coverage_20": -0.008981034625321627, "rewards/accgated_coverage_25": -0.008981034625321627, "rewards/accgated_coverage_5": -0.008981034625321627, "rewards/accuracy_reward": 0.6288194537162781, "rewards/brier_reward": 0.7329934477806092, "rewards/confidence_uniqueness_reward": 0.9333699584007263, "rewards/format_reward": 0.9846354246139526, "rewards/frontier_aurc_reward": -0.002544494904577732, "rewards/frontier_ece_reward": 0.007357514463365078, "rewards/frontier_entropy_batch_reward": -0.30235905647277833, "signal/accgated_coverage_0/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_0/group_std_mean": 0.119529028236866, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_1/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_1/group_std_mean": 0.119529028236866, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_10/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_10/group_std_mean": 0.119529028236866, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_15/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_15/group_std_mean": 0.119529028236866, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_20/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_20/group_std_mean": 0.119529028236866, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_25/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_25/group_std_mean": 0.119529028236866, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_5/centered_abs_mean": 0.08796186000108719, "signal/accgated_coverage_5/group_std_mean": 0.119529028236866, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00879618600010872, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00879618600010872, "signal/accuracy_reward/centered_abs_mean": 0.18984375, "signal/accuracy_reward/group_std_mean": 0.2495385080575943, "signal/accuracy_reward/group_zero_std_frac": 0.2944444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.094921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.094921875, "signal/advantage_abs_mean": 0.12400750964879989, "signal/advantage_pre_scale_abs_mean": 0.12400750964879989, "signal/advantage_pre_scale_std": 0.17833105027675628, "signal/advantage_std": 0.17833105027675628, "signal/brier_reward/centered_abs_mean": 0.21165235340595245, "signal/brier_reward/group_std_mean": 0.2621103286743164, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021165235340595244, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021165235340595244, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03903521485626697, "signal/confidence_uniqueness_reward/group_std_mean": 0.06291390731930732, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0039035214576870203, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0039035214576870203, "signal/format_reward/centered_abs_mean": 0.025244140625, "signal/format_reward/group_std_mean": 0.046590489149093625, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0126220703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0126220703125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024692637380212545, "signal/frontier_aurc_reward/group_std_mean": 0.003953724354505539, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.086579599766992e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.086579599766992e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05328697934746742, "signal/frontier_ece_reward/group_std_mean": 0.08139285743236542, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005328698176890611, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005328698176890611, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3716325283050537, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43904575109481814, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.037163253873586655, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.037163253873586655, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.24055350759374375, "eval_calibration/batch_distribution_entropy": 0.8994038710520571, "eval_calibration/buffer_distribution_entropy": 0.7535534438871466, "eval_calibration/confidence_entropy": 0.46967083146270466, "eval_calibration/coverage@0%": 0.09442204301075269, "eval_calibration/coverage@1%": 0.09442204301075269, "eval_calibration/coverage@10%": 0.09442204301075269, "eval_calibration/coverage@15%": 0.302755376344086, "eval_calibration/coverage@20%": 0.3602150537634408, "eval_calibration/coverage@25%": 0.7044690860215054, "eval_calibration/coverage@30%": 0.9097782258064516, "eval_calibration/coverage@5%": 0.09442204301075269, "eval_calibration/ece": 0.24757249249593236, "eval_calibration/mean_confidence": 0.6110164873464803, "eval_completions/clipped_ratio": 0.016493055555555563, "eval_completions/max_length": 2924.0, "eval_completions/max_terminated_length": 2924.0, "eval_completions/mean_length": 646.5880432128906, "eval_completions/mean_terminated_length": 657.3784790039062, "eval_completions/min_length": 48.5, "eval_completions/min_terminated_length": 235.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 96161165.0, "eval_reward": 0.8702710568904877, "eval_reward_std": 0.23088541626930237, "eval_rewards/accgated_coverage_0": -0.007695769192650914, "eval_rewards/accgated_coverage_1": -0.007695769192650914, "eval_rewards/accgated_coverage_10": -0.007695769192650914, "eval_rewards/accgated_coverage_15": -0.007695769192650914, "eval_rewards/accgated_coverage_20": -0.007695769192650914, "eval_rewards/accgated_coverage_25": -0.007695769192650914, "eval_rewards/accgated_coverage_5": -0.007695769192650914, "eval_rewards/accuracy_reward": 0.6388888955116272, "eval_rewards/brier_reward": 0.7428200940291086, "eval_rewards/confidence_uniqueness_reward": 0.8793094853560129, "eval_rewards/format_reward": 0.9826388955116272, "eval_rewards/frontier_aurc_reward": -0.002335130760911852, "eval_rewards/frontier_ece_reward": 0.009743184200488031, "eval_rewards/frontier_entropy_batch_reward": -0.9826388955116272, "eval_runtime": 196.7222, "eval_samples_per_second": 5.083, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_0/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_1/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_10/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_15/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_20/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_25/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.10488361865282059, "eval_signal/accgated_coverage_5/group_std_mean": 0.15331803013881048, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.010488361585885286, "eval_signal/accuracy_reward/centered_abs_mean": 0.4456380208333333, "eval_signal/accuracy_reward/group_std_mean": 0.47888515889644623, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22281901041666666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22281901041666666, "eval_signal/advantage_abs_mean": 0.19127274056275687, "eval_signal/advantage_pre_scale_abs_mean": 0.19127274056275687, "eval_signal/advantage_pre_scale_std": 0.22979939977327982, "eval_signal/advantage_std": 0.22979939977327982, "eval_signal/brier_reward/centered_abs_mean": 0.2277601733803749, "eval_signal/brier_reward/group_std_mean": 0.28239578008651733, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022776018207271893, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022776018207271893, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06234860916932424, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10822617262601852, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006234860823800166, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006234860823800166, "eval_signal/format_reward/centered_abs_mean": 0.03320312515522043, "eval_signal/format_reward/group_std_mean": 0.08625163851926725, "eval_signal/format_reward/group_zero_std_frac": 0.5555555646618208, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016601562577610213, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.016601562577610213, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002441129821818322, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004314790751474599, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0514122651463065e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0514122651463065e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.046028090020020805, "eval_signal/frontier_ece_reward/group_std_mean": 0.06924534775316715, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0046028091649835306, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0046028091649835306, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03320312515522043, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.08625163851926725, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5555555646618208, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0033203125737297037, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0033203125737297037, "eval_steps_per_second": 0.03, "step": 50 }, { "calibration/aurc": 0.2413598864691334, "calibration/batch_distribution_entropy": 0.9695181837472722, "calibration/buffer_distribution_entropy": 0.7721430753136292, "calibration/confidence_entropy": 0.493656003511313, "calibration/coverage@0%": 0.01855932303109915, "calibration/coverage@1%": 0.01855932303109915, "calibration/coverage@10%": 0.1056984306426477, "calibration/coverage@15%": 0.2471195636637226, "calibration/coverage@20%": 0.4991462525626197, "calibration/coverage@25%": 0.6108610535718757, "calibration/coverage@30%": 0.6864563230590697, "calibration/coverage@5%": 0.05110525478962933, "calibration/ece": 0.17133809174350081, "calibration/mean_confidence": 0.5723697325482006, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018576388888888906, "completions/max_length": 3474.6, "completions/max_terminated_length": 3474.6, "completions/mean_length": 683.1578247070313, "completions/mean_terminated_length": 696.117626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 161.6, "epoch": 0.13199835002062474, "grad_norm": 0.0004027851391583681, "learning_rate": 4.60843373493976e-06, "loss": -0.0177, "num_tokens": 107111719.0, "reward": 0.9484212160110473, "reward_std": 0.1675104558467865, "rewards/accgated_coverage_0": -0.007159726228564978, "rewards/accgated_coverage_1": -0.007159726228564978, "rewards/accgated_coverage_10": -0.007159726228564978, "rewards/accgated_coverage_15": -0.007159726228564978, "rewards/accgated_coverage_20": -0.007159726228564978, "rewards/accgated_coverage_25": -0.007159726228564978, "rewards/accgated_coverage_5": -0.007159726228564978, "rewards/accuracy_reward": 0.6403645873069763, "rewards/brier_reward": 0.7422665119171142, "rewards/confidence_uniqueness_reward": 0.9326986908912659, "rewards/format_reward": 0.981250011920929, "rewards/frontier_aurc_reward": -0.002089855796657503, "rewards/frontier_ece_reward": 0.007551212143152952, "rewards/frontier_entropy_batch_reward": -0.2559980094432831, "signal/accgated_coverage_0/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_0/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_1/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_1/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_10/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_10/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_15/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_15/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_20/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_20/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_25/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_25/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_5/centered_abs_mean": 0.09629447013139725, "signal/accgated_coverage_5/group_std_mean": 0.12818139046430588, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009629447385668755, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009629447385668755, "signal/accuracy_reward/centered_abs_mean": 0.1913899749517441, "signal/accuracy_reward/group_std_mean": 0.25248887240886686, "signal/accuracy_reward/group_zero_std_frac": 0.2833333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09569498747587205, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09569498747587205, "signal/advantage_abs_mean": 0.12650371789932252, "signal/advantage_pre_scale_abs_mean": 0.12650371789932252, "signal/advantage_pre_scale_std": 0.1835579603910446, "signal/advantage_std": 0.1835579603910446, "signal/brier_reward/centered_abs_mean": 0.2032104343175888, "signal/brier_reward/group_std_mean": 0.25411616265773773, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020321043208241463, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020321043208241463, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04227638766169548, "signal/confidence_uniqueness_reward/group_std_mean": 0.0690533883869648, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00422763884998858, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00422763884998858, "signal/format_reward/centered_abs_mean": 0.03132595531642437, "signal/format_reward/group_std_mean": 0.056155077368021014, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015662977658212184, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015662977658212184, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017812325153499843, "signal/frontier_aurc_reward/group_std_mean": 0.0028733307030051948, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2265406369115225e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2265406369115225e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.046052918583154676, "signal/frontier_ece_reward/group_std_mean": 0.06805866062641144, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004605291876941919, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004605291876941919, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3332373261451721, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40649659633636476, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033323732763528825, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033323732763528825, "step": 55 }, { "calibration/aurc": 0.3141651107938186, "calibration/batch_distribution_entropy": 0.9078290881449693, "calibration/buffer_distribution_entropy": 0.7981656224187011, "calibration/confidence_entropy": 0.48061793223665383, "calibration/coverage@0%": 0.0068981182316749405, "calibration/coverage@1%": 0.0068981182316749405, "calibration/coverage@10%": 0.009025777806143026, "calibration/coverage@15%": 0.2827577680892725, "calibration/coverage@20%": 0.40764372400036875, "calibration/coverage@25%": 0.4818494291715309, "calibration/coverage@30%": 0.6399625689519306, "calibration/coverage@5%": 0.0068981182316749405, "calibration/ece": 0.1617690875291686, "calibration/mean_confidence": 0.6608954671026755, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01605902777777779, "completions/max_length": 3923.2, "completions/max_terminated_length": 3923.2, "completions/mean_length": 697.619970703125, "completions/mean_terminated_length": 709.0864135742188, "completions/min_length": 0.0, "completions/min_terminated_length": 191.2, "epoch": 0.14399820002249972, "grad_norm": 0.0003930129169020802, "learning_rate": 4.457831325301205e-06, "loss": -0.0158, "num_tokens": 118244877.0, "reward": 0.9352314114570618, "reward_std": 0.16064883172512054, "rewards/accgated_coverage_0": 0.004846313409507275, "rewards/accgated_coverage_1": 0.004846313409507275, "rewards/accgated_coverage_10": 0.004846313409507275, "rewards/accgated_coverage_15": 0.004846313409507275, "rewards/accgated_coverage_20": 0.004846313409507275, "rewards/accgated_coverage_25": 0.004846313409507275, "rewards/accgated_coverage_5": 0.004846313409507275, "rewards/accuracy_reward": 0.6222222208976745, "rewards/brier_reward": 0.7681910872459412, "rewards/confidence_uniqueness_reward": 0.922529149055481, "rewards/format_reward": 0.983593761920929, "rewards/frontier_aurc_reward": -0.002249367954209447, "rewards/frontier_ece_reward": 0.0151396244764328, "rewards/frontier_entropy_batch_reward": -0.4162684798240662, "signal/accgated_coverage_0/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_0/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_1/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_1/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_10/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_10/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_15/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_15/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_20/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_20/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_25/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_25/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_5/centered_abs_mean": 0.06432908028364182, "signal/accgated_coverage_5/group_std_mean": 0.09131758958101273, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006432908400893211, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006432908400893211, "signal/accuracy_reward/centered_abs_mean": 0.19360894560813904, "signal/accuracy_reward/group_std_mean": 0.25349748134613037, "signal/accuracy_reward/group_zero_std_frac": 0.28611111342906953, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09680447280406952, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09680447280406952, "signal/advantage_abs_mean": 0.12054670006036758, "signal/advantage_pre_scale_abs_mean": 0.12054670006036758, "signal/advantage_pre_scale_std": 0.18156724274158478, "signal/advantage_std": 0.18156724274158478, "signal/brier_reward/centered_abs_mean": 0.17527975440025328, "signal/brier_reward/group_std_mean": 0.22410308420658112, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017527975887060166, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017527975887060166, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045045085996389386, "signal/confidence_uniqueness_reward/group_std_mean": 0.06884901076555253, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00450450861826539, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00450450861826539, "signal/format_reward/centered_abs_mean": 0.02762044295668602, "signal/format_reward/group_std_mean": 0.04820304960012436, "signal/format_reward/group_zero_std_frac": 0.8166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01381022147834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01381022147834301, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021717621013522146, "signal/frontier_aurc_reward/group_std_mean": 0.00348337315954268, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.71470271400176e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.71470271400176e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03851405009627342, "signal/frontier_ece_reward/group_std_mean": 0.05781885012984276, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003851405158638954, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003851405158638954, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38271217942237856, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44837799072265627, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03827122002840042, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03827122002840042, "step": 60 }, { "calibration/aurc": 0.2013986441728523, "calibration/batch_distribution_entropy": 0.9596038273329303, "calibration/buffer_distribution_entropy": 0.8192018538249008, "calibration/confidence_entropy": 0.4998554429294299, "calibration/coverage@0%": 0.039043696775094235, "calibration/coverage@1%": 0.039043696775094235, "calibration/coverage@10%": 0.2842873097864024, "calibration/coverage@15%": 0.544249616082647, "calibration/coverage@20%": 0.6466941225743404, "calibration/coverage@25%": 0.709493229093955, "calibration/coverage@30%": 0.7496021220159151, "calibration/coverage@5%": 0.10911489599329889, "calibration/ece": 0.13580137123877775, "calibration/mean_confidence": 0.5785728899541247, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011718749999999977, "completions/max_length": 3612.8, "completions/max_terminated_length": 3612.8, "completions/mean_length": 711.3194580078125, "completions/mean_terminated_length": 719.77431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 174.2, "epoch": 0.1559980500243747, "grad_norm": 0.0004044240340590477, "learning_rate": 4.307228915662651e-06, "loss": -0.0106, "num_tokens": 129533325.0, "reward": 0.9640808582305909, "reward_std": 0.1412920206785202, "rewards/accgated_coverage_0": -0.0022170018404722215, "rewards/accgated_coverage_1": -0.0022170018404722215, "rewards/accgated_coverage_10": -0.0022170018404722215, "rewards/accgated_coverage_15": -0.0022170018404722215, "rewards/accgated_coverage_20": -0.0022170018404722215, "rewards/accgated_coverage_25": -0.0022170018404722215, "rewards/accgated_coverage_5": -0.0022170018404722215, "rewards/accuracy_reward": 0.65234375, "rewards/brier_reward": 0.7770432949066162, "rewards/confidence_uniqueness_reward": 0.938026773929596, "rewards/format_reward": 0.9880208373069763, "rewards/frontier_aurc_reward": -0.0016461270628497005, "rewards/frontier_ece_reward": 0.011523347347974777, "rewards/frontier_entropy_batch_reward": -0.2718831717967987, "signal/accgated_coverage_0/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_0/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_1/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_1/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_10/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_10/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_15/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_15/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_20/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_20/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_25/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_25/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_5/centered_abs_mean": 0.08875515162944794, "signal/accgated_coverage_5/group_std_mean": 0.11871466189622878, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008875515405088663, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008875515405088663, "signal/accuracy_reward/centered_abs_mean": 0.17527669221162795, "signal/accuracy_reward/group_std_mean": 0.23397073447704314, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08763834610581397, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08763834610581397, "signal/advantage_abs_mean": 0.10502037107944488, "signal/advantage_pre_scale_abs_mean": 0.10502037107944488, "signal/advantage_pre_scale_std": 0.15789510905742646, "signal/advantage_std": 0.15789510905742646, "signal/brier_reward/centered_abs_mean": 0.16230989396572112, "signal/brier_reward/group_std_mean": 0.20729315876960755, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0162309892475605, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0162309892475605, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03273056633770466, "signal/confidence_uniqueness_reward/group_std_mean": 0.05453848391771317, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032730566337704657, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032730566337704657, "signal/format_reward/centered_abs_mean": 0.020746527798473836, "signal/format_reward/group_std_mean": 0.040246833488345145, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010373263899236918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010373263899236918, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012965922243893147, "signal/frontier_aurc_reward/group_std_mean": 0.0021312762284651397, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6207403132284525e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6207403132284525e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.035742347687482835, "signal/frontier_ece_reward/group_std_mean": 0.05180431827902794, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035742347594350577, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035742347594350577, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3321096122264862, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4016284167766571, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03321096152067184, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03321096152067184, "step": 65 }, { "calibration/aurc": 0.2615254020463663, "calibration/batch_distribution_entropy": 0.9140696171294641, "calibration/buffer_distribution_entropy": 0.8353317778810793, "calibration/confidence_entropy": 0.4293006215562638, "calibration/coverage@0%": 0.006300687857756918, "calibration/coverage@1%": 0.006300687857756918, "calibration/coverage@10%": 0.1367663808861394, "calibration/coverage@15%": 0.2703061500937195, "calibration/coverage@20%": 0.32851268883109996, "calibration/coverage@25%": 0.3903056792973686, "calibration/coverage@30%": 0.6369962387631143, "calibration/coverage@5%": 0.07840595101565165, "calibration/ece": 0.1320950617526681, "calibration/mean_confidence": 0.6382482101514636, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011892361111111093, "completions/max_length": 3493.2, "completions/max_terminated_length": 3493.2, "completions/mean_length": 690.7427001953125, "completions/mean_terminated_length": 699.0555297851563, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.16799790002624967, "grad_norm": 0.00037954424624331295, "learning_rate": 4.156626506024097e-06, "loss": -0.0124, "num_tokens": 140568825.0, "reward": 0.9529558777809143, "reward_std": 0.14111488908529282, "rewards/accgated_coverage_0": 0.01084673785371706, "rewards/accgated_coverage_1": 0.01084673785371706, "rewards/accgated_coverage_10": 0.01084673785371706, "rewards/accgated_coverage_15": 0.01084673785371706, "rewards/accgated_coverage_20": 0.01084673785371706, "rewards/accgated_coverage_25": 0.01084673785371706, "rewards/accgated_coverage_5": 0.01084673785371706, "rewards/accuracy_reward": 0.63125, "rewards/brier_reward": 0.7789486169815063, "rewards/confidence_uniqueness_reward": 0.9293984413146973, "rewards/format_reward": 0.9881076455116272, "rewards/frontier_aurc_reward": -0.001911242282949388, "rewards/frontier_ece_reward": 0.016348773241043092, "rewards/frontier_entropy_batch_reward": -0.3676132559776306, "signal/accgated_coverage_0/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_0/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_1/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_1/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_10/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_10/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_15/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_15/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_20/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_20/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_25/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_25/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_5/centered_abs_mean": 0.07885657548904419, "signal/accgated_coverage_5/group_std_mean": 0.10609191805124282, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007885657250881195, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007885657250881195, "signal/accuracy_reward/centered_abs_mean": 0.18004557192325593, "signal/accuracy_reward/group_std_mean": 0.23771512806415557, "signal/accuracy_reward/group_zero_std_frac": 0.3222222298383713, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09002278596162797, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09002278596162797, "signal/advantage_abs_mean": 0.10413759350776672, "signal/advantage_pre_scale_abs_mean": 0.10413759350776672, "signal/advantage_pre_scale_std": 0.16083113849163055, "signal/advantage_std": 0.16083113849163055, "signal/brier_reward/centered_abs_mean": 0.16419869065284728, "signal/brier_reward/group_std_mean": 0.2109546959400177, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016419869475066663, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016419869475066663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03784245103597641, "signal/confidence_uniqueness_reward/group_std_mean": 0.06083763241767883, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003784245066344738, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003784245066344738, "signal/format_reward/centered_abs_mean": 0.020556640811264515, "signal/format_reward/group_std_mean": 0.040697479248046876, "signal/format_reward/group_zero_std_frac": 0.8250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010278320405632257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010278320405632257, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018522955011576413, "signal/frontier_aurc_reward/group_std_mean": 0.002898959442973137, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3153694564825854e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3153694564825854e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03703445121645928, "signal/frontier_ece_reward/group_std_mean": 0.053357198089361194, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037034451961517334, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037034451961517334, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3631279289722443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43171375393867495, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03631279319524765, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03631279319524765, "step": 70 }, { "calibration/aurc": 0.22257555095357223, "calibration/batch_distribution_entropy": 0.9400871823295274, "calibration/buffer_distribution_entropy": 0.8464505395459649, "calibration/confidence_entropy": 0.4936839122169815, "calibration/coverage@0%": 0.016386362001433938, "calibration/coverage@1%": 0.016386362001433938, "calibration/coverage@10%": 0.24796221501980664, "calibration/coverage@15%": 0.33072087018791063, "calibration/coverage@20%": 0.4732882211713065, "calibration/coverage@25%": 0.649742582351726, "calibration/coverage@30%": 0.7411867364746947, "calibration/coverage@5%": 0.11962625701455729, "calibration/ece": 0.14055367731730012, "calibration/mean_confidence": 0.6228957550672582, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008072916666666697, "completions/max_length": 2918.4, "completions/max_terminated_length": 2918.4, "completions/mean_length": 692.5636352539062, "completions/mean_terminated_length": 698.1532470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 199.6, "epoch": 0.17999775002812465, "grad_norm": 0.00042614529957063496, "learning_rate": 4.006024096385543e-06, "loss": -0.0077, "num_tokens": 151612054.0, "reward": 0.9811522126197815, "reward_std": 0.1300223708152771, "rewards/accgated_coverage_0": 0.0006343376822769642, "rewards/accgated_coverage_1": 0.0006343376822769642, "rewards/accgated_coverage_10": 0.0006343376822769642, "rewards/accgated_coverage_15": 0.0006343376822769642, "rewards/accgated_coverage_20": 0.0006343376822769642, "rewards/accgated_coverage_25": 0.0006343376822769642, "rewards/accgated_coverage_5": 0.0006343376822769642, "rewards/accuracy_reward": 0.6845486164093018, "rewards/brier_reward": 0.7983974099159241, "rewards/confidence_uniqueness_reward": 0.9372838854789733, "rewards/format_reward": 0.9917534708976745, "rewards/frontier_aurc_reward": -0.0014764688210561871, "rewards/frontier_ece_reward": 0.01250108890235424, "rewards/frontier_entropy_batch_reward": -0.32242658734321594, "signal/accgated_coverage_0/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_0/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_1/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_1/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_10/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_10/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_15/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_15/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_20/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_20/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_25/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_25/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_5/centered_abs_mean": 0.08589751869440079, "signal/accgated_coverage_5/group_std_mean": 0.11587843298912048, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008589751925319434, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008589751925319434, "signal/accuracy_reward/centered_abs_mean": 0.1838541656732559, "signal/accuracy_reward/group_std_mean": 0.23774852454662324, "signal/accuracy_reward/group_zero_std_frac": 0.3416666716337204, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09192708283662795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09192708283662795, "signal/advantage_abs_mean": 0.0963760793209076, "signal/advantage_pre_scale_abs_mean": 0.0963760793209076, "signal/advantage_pre_scale_std": 0.15004458129405976, "signal/advantage_std": 0.15004458129405976, "signal/brier_reward/centered_abs_mean": 0.14227010905742646, "signal/brier_reward/group_std_mean": 0.1828959047794342, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014227011241018772, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014227011241018772, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029756984487175942, "signal/confidence_uniqueness_reward/group_std_mean": 0.050496813654899594, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002975698420777917, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002975698420777917, "signal/format_reward/centered_abs_mean": 0.015228949673473834, "signal/format_reward/group_std_mean": 0.03322426415979862, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007614474836736917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007614474836736917, "signal/frontier_aurc_reward/centered_abs_mean": 0.001322699082084, "signal/frontier_aurc_reward/group_std_mean": 0.0020757037913426758, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.653373910812661e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.653373910812661e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03186420500278473, "signal/frontier_ece_reward/group_std_mean": 0.046358488500118256, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003186420677229762, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003186420677229762, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33970091938972474, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4100371837615967, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03397009335458279, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03397009335458279, "step": 75 }, { "calibration/aurc": 0.18373823770605507, "calibration/batch_distribution_entropy": 0.9194311712417902, "calibration/buffer_distribution_entropy": 0.8560351604828632, "calibration/confidence_entropy": 0.4625009964263458, "calibration/coverage@0%": 0.049749790760420874, "calibration/coverage@1%": 0.08692256562953082, "calibration/coverage@10%": 0.34027520819356083, "calibration/coverage@15%": 0.3944861092123807, "calibration/coverage@20%": 0.6651673002075583, "calibration/coverage@25%": 0.7554071648312486, "calibration/coverage@30%": 0.8342456742456743, "calibration/coverage@5%": 0.10103960315134583, "calibration/ece": 0.14968959270054943, "calibration/mean_confidence": 0.6513386481500065, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013368055555555558, "completions/max_length": 3756.6, "completions/max_terminated_length": 3756.6, "completions/mean_length": 750.0751831054688, "completions/mean_terminated_length": 760.394775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 220.6, "epoch": 0.19199760002999963, "grad_norm": 0.000407382205594331, "learning_rate": 3.855421686746989e-06, "loss": -0.0129, "num_tokens": 163306200.0, "reward": 0.9649693131446838, "reward_std": 0.1357353910803795, "rewards/accgated_coverage_0": -0.00146528814220801, "rewards/accgated_coverage_1": -0.00146528814220801, "rewards/accgated_coverage_10": -0.00146528814220801, "rewards/accgated_coverage_15": -0.00146528814220801, "rewards/accgated_coverage_20": -0.00146528814220801, "rewards/accgated_coverage_25": -0.00146528814220801, "rewards/accgated_coverage_5": -0.00146528814220801, "rewards/accuracy_reward": 0.6591145753860473, "rewards/brier_reward": 0.7822228908538819, "rewards/confidence_uniqueness_reward": 0.9348131895065308, "rewards/format_reward": 0.9866319537162781, "rewards/frontier_aurc_reward": -0.001650554989464581, "rewards/frontier_ece_reward": 0.011803200282156468, "rewards/frontier_entropy_batch_reward": -0.2974155843257904, "signal/accgated_coverage_0/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_0/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_1/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_1/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_10/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_10/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_15/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_15/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_20/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_20/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_25/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_25/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_5/centered_abs_mean": 0.08280792832374573, "signal/accgated_coverage_5/group_std_mean": 0.11122777611017227, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008280793204903603, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008280793204903603, "signal/accuracy_reward/centered_abs_mean": 0.18087565004825593, "signal/accuracy_reward/group_std_mean": 0.24078828990459442, "signal/accuracy_reward/group_zero_std_frac": 0.30555556118488314, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09043782502412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09043782502412796, "signal/advantage_abs_mean": 0.10051447302103042, "signal/advantage_pre_scale_abs_mean": 0.10051447302103042, "signal/advantage_pre_scale_std": 0.15557511448860167, "signal/advantage_std": 0.15557511448860167, "signal/brier_reward/centered_abs_mean": 0.14737005531787872, "signal/brier_reward/group_std_mean": 0.1906406193971634, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014737005904316902, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014737005904316902, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03383201137185097, "signal/confidence_uniqueness_reward/group_std_mean": 0.055407488346099855, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003383201127871871, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003383201127871871, "signal/format_reward/centered_abs_mean": 0.02119140587747097, "signal/format_reward/group_std_mean": 0.04045570828020573, "signal/format_reward/group_zero_std_frac": 0.8277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010595702938735485, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010595702938735485, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014987794915214182, "signal/frontier_aurc_reward/group_std_mean": 0.0023568985518068073, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.873474338935921e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.873474338935921e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03193121068179607, "signal/frontier_ece_reward/group_std_mean": 0.04668203741312027, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031931213103234767, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031931213103234767, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33423511385917665, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40302748084068296, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0334235105663538, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0334235105663538, "step": 80 }, { "calibration/aurc": 0.1659009748605335, "calibration/batch_distribution_entropy": 0.9631188152950273, "calibration/buffer_distribution_entropy": 0.8654873779220141, "calibration/confidence_entropy": 0.47541938570574993, "calibration/coverage@0%": 0.03987973405736563, "calibration/coverage@1%": 0.03987973405736563, "calibration/coverage@10%": 0.2952229479904432, "calibration/coverage@15%": 0.5116889055028054, "calibration/coverage@20%": 0.6731285252313406, "calibration/coverage@25%": 0.7771573541029104, "calibration/coverage@30%": 0.8733169729880256, "calibration/coverage@5%": 0.16991762592040008, "calibration/ece": 0.11026244108539315, "calibration/mean_confidence": 0.5748356978844124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888907, "completions/max_length": 3788.2, "completions/max_terminated_length": 3788.2, "completions/mean_length": 749.9548583984375, "completions/mean_terminated_length": 756.955517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 251.6, "epoch": 0.2039974500318746, "grad_norm": 0.0003434408863540739, "learning_rate": 3.7048192771084342e-06, "loss": -0.0101, "num_tokens": 175032880.0, "reward": 0.9802459597587585, "reward_std": 0.12824074923992157, "rewards/accgated_coverage_0": 0.009267809754237532, "rewards/accgated_coverage_1": 0.009267809754237532, "rewards/accgated_coverage_10": 0.009267809754237532, "rewards/accgated_coverage_15": 0.009267809754237532, "rewards/accgated_coverage_20": 0.009267809754237532, "rewards/accgated_coverage_25": 0.009267809754237532, "rewards/accgated_coverage_5": 0.009267809754237532, "rewards/accuracy_reward": 0.6739583373069763, "rewards/brier_reward": 0.8075190901756286, "rewards/confidence_uniqueness_reward": 0.9344035863876343, "rewards/format_reward": 0.990711796283722, "rewards/frontier_aurc_reward": -0.001392999361269176, "rewards/frontier_ece_reward": 0.015480473451316357, "rewards/frontier_entropy_batch_reward": -0.34299505352973936, "signal/accgated_coverage_0/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_0/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_1/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_1/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_10/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_10/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_15/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_15/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_20/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_20/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_25/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_25/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_5/centered_abs_mean": 0.07954660803079605, "signal/accgated_coverage_5/group_std_mean": 0.10623638331890106, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007954660896211862, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007954660896211862, "signal/accuracy_reward/centered_abs_mean": 0.17490234673023225, "signal/accuracy_reward/group_std_mean": 0.2357180804014206, "signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08745117336511612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08745117336511612, "signal/advantage_abs_mean": 0.09189856797456741, "signal/advantage_pre_scale_abs_mean": 0.09189856797456741, "signal/advantage_pre_scale_std": 0.14546703696250915, "signal/advantage_std": 0.14546703696250915, "signal/brier_reward/centered_abs_mean": 0.13718933761119842, "signal/brier_reward/group_std_mean": 0.17911297082901, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013718934170901776, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013718934170901776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.032110657170414926, "signal/confidence_uniqueness_reward/group_std_mean": 0.05388362035155296, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032110656145960094, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032110656145960094, "signal/format_reward/centered_abs_mean": 0.01693250872194767, "signal/format_reward/group_std_mean": 0.03573401048779488, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008466254360973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008466254360973835, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014202272053807975, "signal/frontier_aurc_reward/group_std_mean": 0.0022525871871039273, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.775284035829827e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.775284035829827e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03186605237424374, "signal/frontier_ece_reward/group_std_mean": 0.0464069627225399, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031866051722317934, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031866051722317934, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33895500302314757, "signal/frontier_entropy_batch_reward/group_std_mean": 0.410440057516098, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033895500004291534, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033895500004291534, "step": 85 }, { "calibration/aurc": 0.12977931025427208, "calibration/batch_distribution_entropy": 0.958446248326358, "calibration/buffer_distribution_entropy": 0.8750197391454562, "calibration/confidence_entropy": 0.47956886261730636, "calibration/coverage@0%": 0.08705992500365711, "calibration/coverage@1%": 0.09907036886788688, "calibration/coverage@10%": 0.47727502918121056, "calibration/coverage@15%": 0.6430634133674967, "calibration/coverage@20%": 0.7598805387698602, "calibration/coverage@25%": 0.8284771858836599, "calibration/coverage@30%": 0.9102535133645633, "calibration/coverage@5%": 0.3457808145752194, "calibration/ece": 0.13963566186460488, "calibration/mean_confidence": 0.5722278866912511, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333348, "completions/max_length": 3512.2, "completions/max_terminated_length": 3512.2, "completions/mean_length": 737.26015625, "completions/mean_terminated_length": 744.0468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 217.8, "epoch": 0.2159973000337496, "grad_norm": 0.00042602099711075425, "learning_rate": 3.5542168674698798e-06, "loss": -0.0082, "num_tokens": 186594789.0, "reward": 0.981863534450531, "reward_std": 0.12287124991416931, "rewards/accgated_coverage_0": 0.005918828677386046, "rewards/accgated_coverage_1": 0.005918828677386046, "rewards/accgated_coverage_10": 0.005918828677386046, "rewards/accgated_coverage_15": 0.005918828677386046, "rewards/accgated_coverage_20": 0.005918828677386046, "rewards/accgated_coverage_25": 0.005918828677386046, "rewards/accgated_coverage_5": 0.005918828677386046, "rewards/accuracy_reward": 0.668836796283722, "rewards/brier_reward": 0.7972975611686707, "rewards/confidence_uniqueness_reward": 0.9405464291572571, "rewards/format_reward": 0.9907118082046509, "rewards/frontier_aurc_reward": -0.001252932590432465, "rewards/frontier_ece_reward": 0.013159998878836631, "rewards/frontier_entropy_batch_reward": -0.27138724327087405, "signal/accgated_coverage_0/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_0/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_1/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_1/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_10/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_10/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_15/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_15/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_20/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_20/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_25/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_25/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_5/centered_abs_mean": 0.08956028670072555, "signal/accgated_coverage_5/group_std_mean": 0.11862562745809554, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008956028707325458, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008956028707325458, "signal/accuracy_reward/centered_abs_mean": 0.17049153745174409, "signal/accuracy_reward/group_std_mean": 0.22411769330501558, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08524576872587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08524576872587204, "signal/advantage_abs_mean": 0.08913673162460327, "signal/advantage_pre_scale_abs_mean": 0.08913673162460327, "signal/advantage_pre_scale_std": 0.14024181962013244, "signal/advantage_std": 0.14024181962013244, "signal/brier_reward/centered_abs_mean": 0.14370980560779573, "signal/brier_reward/group_std_mean": 0.18593351542949677, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014370980486273766, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014370980486273766, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028378911688923834, "signal/confidence_uniqueness_reward/group_std_mean": 0.04979285299777984, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028378912713378666, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028378912713378666, "signal/format_reward/centered_abs_mean": 0.01656358502805233, "signal/format_reward/group_std_mean": 0.03597295694053173, "signal/format_reward/group_zero_std_frac": 0.8333333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008281792514026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008281792514026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012377587612718345, "signal/frontier_aurc_reward/group_std_mean": 0.001967509277164936, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5471983897441532e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5471983897441532e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03037920966744423, "signal/frontier_ece_reward/group_std_mean": 0.043885117024183275, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030379209667444227, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030379209667444227, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3221738815307617, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3930954456329346, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03221738450229168, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03221738450229168, "step": 90 }, { "calibration/aurc": 0.18050527287688292, "calibration/batch_distribution_entropy": 0.9483888084359655, "calibration/buffer_distribution_entropy": 0.8852656327097506, "calibration/confidence_entropy": 0.4446431319687152, "calibration/coverage@0%": 0.015151934406954707, "calibration/coverage@1%": 0.015151934406954707, "calibration/coverage@10%": 0.5202712232182465, "calibration/coverage@15%": 0.5869756896666373, "calibration/coverage@20%": 0.6468486084413063, "calibration/coverage@25%": 0.6841459144943153, "calibration/coverage@30%": 0.7356502203434319, "calibration/coverage@5%": 0.22796958061671996, "calibration/ece": 0.15324055615517662, "calibration/mean_confidence": 0.5787011570773354, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006336805555555536, "completions/max_length": 3316.2, "completions/max_terminated_length": 3316.2, "completions/mean_length": 738.9697265625, "completions/mean_terminated_length": 743.6612426757813, "completions/min_length": 0.0, "completions/min_terminated_length": 208.2, "epoch": 0.22799715003562457, "grad_norm": 0.0003902704920619726, "learning_rate": 3.4036144578313257e-06, "loss": -0.0059, "num_tokens": 198199400.0, "reward": 0.977604615688324, "reward_std": 0.12400392889976501, "rewards/accgated_coverage_0": 0.013323387503623963, "rewards/accgated_coverage_1": 0.013323387503623963, "rewards/accgated_coverage_10": 0.013323387503623963, "rewards/accgated_coverage_15": 0.013323387503623963, "rewards/accgated_coverage_20": 0.013323387503623963, "rewards/accgated_coverage_25": 0.013323387503623963, "rewards/accgated_coverage_5": 0.013323387503623963, "rewards/accuracy_reward": 0.6585069417953491, "rewards/brier_reward": 0.807054877281189, "rewards/confidence_uniqueness_reward": 0.9377588510513306, "rewards/format_reward": 0.9935763955116272, "rewards/frontier_aurc_reward": -0.001382858515717089, "rewards/frontier_ece_reward": 0.015744138136506082, "rewards/frontier_entropy_batch_reward": -0.3380190432071686, "signal/accgated_coverage_0/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_0/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_1/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_1/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_10/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_10/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_15/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_15/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_20/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_20/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_25/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_25/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_5/centered_abs_mean": 0.07688793241977691, "signal/accgated_coverage_5/group_std_mean": 0.10520303398370742, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007688793074339628, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007688793074339628, "signal/accuracy_reward/centered_abs_mean": 0.16501736044883727, "signal/accuracy_reward/group_std_mean": 0.22163840532302856, "signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250868022441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08250868022441864, "signal/advantage_abs_mean": 0.09041554778814316, "signal/advantage_pre_scale_abs_mean": 0.09041554778814316, "signal/advantage_pre_scale_std": 0.142058926820755, "signal/advantage_std": 0.142058926820755, "signal/brier_reward/centered_abs_mean": 0.13740523755550385, "signal/brier_reward/group_std_mean": 0.1825299233198166, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013740524649620056, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013740524649620056, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027925553545355798, "signal/confidence_uniqueness_reward/group_std_mean": 0.04514765739440918, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002792555373162031, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002792555373162031, "signal/format_reward/centered_abs_mean": 0.011762152798473835, "signal/format_reward/group_std_mean": 0.025851282477378845, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005881076399236918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005881076399236918, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015603850362822414, "signal/frontier_aurc_reward/group_std_mean": 0.0025688192108646035, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.950481346284505e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.950481346284505e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03039446845650673, "signal/frontier_ece_reward/group_std_mean": 0.04404748827219009, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00303944693878293, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00303944693878293, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35081249475479126, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42071945071220396, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035081248730421066, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035081248730421066, "step": 95 }, { "calibration/aurc": 0.14878210287435184, "calibration/batch_distribution_entropy": 0.9685480405199234, "calibration/buffer_distribution_entropy": 0.8919329426013801, "calibration/confidence_entropy": 0.5030879963092827, "calibration/coverage@0%": 0.050392448689359805, "calibration/coverage@1%": 0.050392448689359805, "calibration/coverage@10%": 0.29977935142486195, "calibration/coverage@15%": 0.6217676940395167, "calibration/coverage@20%": 0.7900059832613835, "calibration/coverage@25%": 0.8822578230367519, "calibration/coverage@30%": 0.9150943263459359, "calibration/coverage@5%": 0.1633913711877651, "calibration/ece": 0.15545339149716966, "calibration/mean_confidence": 0.557937458366312, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008940972222222232, "completions/max_length": 3830.8, "completions/max_terminated_length": 3830.8, "completions/mean_length": 756.8622436523438, "completions/mean_terminated_length": 763.7328247070312, "completions/min_length": 0.0, "completions/min_terminated_length": 223.2, "epoch": 0.23999700003749952, "grad_norm": 0.0003215717733837664, "learning_rate": 3.2530120481927713e-06, "loss": -0.0086, "num_tokens": 210017525.0, "reward": 0.98323655128479, "reward_std": 0.12426994442939758, "rewards/accgated_coverage_0": 0.012122076144441963, "rewards/accgated_coverage_1": 0.012122076144441963, "rewards/accgated_coverage_10": 0.012122076144441963, "rewards/accgated_coverage_15": 0.012122076144441963, "rewards/accgated_coverage_20": 0.012122076144441963, "rewards/accgated_coverage_25": 0.012122076144441963, "rewards/accgated_coverage_5": 0.012122076144441963, "rewards/accuracy_reward": 0.6693576455116272, "rewards/brier_reward": 0.8081021189689637, "rewards/confidence_uniqueness_reward": 0.9393685340881348, "rewards/format_reward": 0.9909722208976746, "rewards/frontier_aurc_reward": -0.0011498184525407852, "rewards/frontier_ece_reward": 0.01175499688833952, "rewards/frontier_entropy_batch_reward": -0.3132203757762909, "signal/accgated_coverage_0/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_0/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_1/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_1/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_10/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_10/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_15/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_15/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_20/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_20/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_25/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_25/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_5/centered_abs_mean": 0.0879080355167389, "signal/accgated_coverage_5/group_std_mean": 0.11624083817005157, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008790803793817758, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008790803793817758, "signal/accuracy_reward/centered_abs_mean": 0.17026366889476777, "signal/accuracy_reward/group_std_mean": 0.22329876124858855, "signal/accuracy_reward/group_zero_std_frac": 0.36944444179534913, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08513183444738388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08513183444738388, "signal/advantage_abs_mean": 0.09176785200834274, "signal/advantage_pre_scale_abs_mean": 0.09176785200834274, "signal/advantage_pre_scale_std": 0.14243557453155517, "signal/advantage_std": 0.14243557453155517, "signal/brier_reward/centered_abs_mean": 0.13790313005447388, "signal/brier_reward/group_std_mean": 0.1790826916694641, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013790314458310604, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013790314458310604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028661540523171426, "signal/confidence_uniqueness_reward/group_std_mean": 0.04833717867732048, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028661541175097225, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028661541175097225, "signal/format_reward/centered_abs_mean": 0.016037326119840146, "signal/format_reward/group_std_mean": 0.033191120624542235, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008018663059920073, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008018663059920073, "signal/frontier_aurc_reward/centered_abs_mean": 0.0010956939542666078, "signal/frontier_aurc_reward/group_std_mean": 0.001799008040688932, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.369617530144751e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.369617530144751e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.025799740105867386, "signal/frontier_ece_reward/group_std_mean": 0.037496446073055266, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002579974289983511, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002579974289983511, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3498570203781128, "signal/frontier_entropy_batch_reward/group_std_mean": 0.417994225025177, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03498570322990417, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03498570322990417, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.1517072771943686, "eval_calibration/batch_distribution_entropy": 0.8719902280793708, "eval_calibration/buffer_distribution_entropy": 0.8971747945256113, "eval_calibration/confidence_entropy": 0.43289156185263855, "eval_calibration/coverage@0%": 0.234375, "eval_calibration/coverage@1%": 0.234375, "eval_calibration/coverage@10%": 0.4427083333333333, "eval_calibration/coverage@15%": 0.625, "eval_calibration/coverage@20%": 0.75, "eval_calibration/coverage@25%": 0.8854166666666666, "eval_calibration/coverage@30%": 0.9322916666666666, "eval_calibration/coverage@5%": 0.2708333333333333, "eval_calibration/ece": 0.17274301417681212, "eval_calibration/mean_confidence": 0.6774464357123575, "eval_completions/clipped_ratio": 0.005208333333333333, "eval_completions/max_length": 2554.0, "eval_completions/max_terminated_length": 2554.0, "eval_completions/mean_length": 727.868418375651, "eval_completions/mean_terminated_length": 731.6888529459635, "eval_completions/min_length": 95.16666666666667, "eval_completions/min_terminated_length": 270.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 210017525.0, "eval_reward": 0.9147416253884634, "eval_reward_std": 0.2155863419175148, "eval_rewards/accgated_coverage_0": 0.02142564587605496, "eval_rewards/accgated_coverage_1": 0.02142564587605496, "eval_rewards/accgated_coverage_10": 0.02142564587605496, "eval_rewards/accgated_coverage_15": 0.02142564587605496, "eval_rewards/accgated_coverage_20": 0.02142564587605496, "eval_rewards/accgated_coverage_25": 0.02142564587605496, "eval_rewards/accgated_coverage_5": 0.02142564587605496, "eval_rewards/accuracy_reward": 0.6623263855775198, "eval_rewards/brier_reward": 0.821544220050176, "eval_rewards/confidence_uniqueness_reward": 0.8742929995059967, "eval_rewards/format_reward": 0.9930555522441864, "eval_rewards/frontier_aurc_reward": -0.0016115416074171662, "eval_rewards/frontier_ece_reward": 0.017946766689419746, "eval_rewards/frontier_entropy_batch_reward": -0.9930555522441864, "eval_runtime": 198.4437, "eval_samples_per_second": 5.039, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_0/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_1/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_10/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_15/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_20/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_25/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.09550586342811584, "eval_signal/accgated_coverage_5/group_std_mean": 0.14860529700915018, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009550586187591156, "eval_signal/accuracy_reward/centered_abs_mean": 0.43505859375, "eval_signal/accuracy_reward/group_std_mean": 0.472920889655749, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.217529296875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.217529296875, "eval_signal/advantage_abs_mean": 0.18196682880322138, "eval_signal/advantage_pre_scale_abs_mean": 0.18196682880322138, "eval_signal/advantage_pre_scale_std": 0.21450272450844446, "eval_signal/advantage_std": 0.21450272450844446, "eval_signal/brier_reward/centered_abs_mean": 0.19742226352294287, "eval_signal/brier_reward/group_std_mean": 0.2611571674545606, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01974222684899966, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01974222684899966, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.056711938232183456, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08472888544201851, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00567119390082856, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00567119390082856, "eval_signal/format_reward/centered_abs_mean": 0.013454860697189966, "eval_signal/format_reward/group_std_mean": 0.03928371022144953, "eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430348594983, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430348594983, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0027537442122896514, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004981053527444601, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442180301741852e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442180301741852e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03306501638144255, "eval_signal/frontier_ece_reward/group_std_mean": 0.05123907576004664, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033065018554528556, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033065018554528556, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013454860697189966, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.03928371022144953, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0013454861279266577, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013454861279266577, "eval_steps_per_second": 0.03, "step": 100 }, { "calibration/aurc": 0.27191994998532804, "calibration/batch_distribution_entropy": 0.9110664528383987, "calibration/buffer_distribution_entropy": 0.8976974161527924, "calibration/confidence_entropy": 0.4418744288231554, "calibration/coverage@0%": 0.07043912039759519, "calibration/coverage@1%": 0.0741428241012989, "calibration/coverage@10%": 0.20709950140523467, "calibration/coverage@15%": 0.2794609689211093, "calibration/coverage@20%": 0.3744648370189722, "calibration/coverage@25%": 0.44615183007087034, "calibration/coverage@30%": 0.5388811184790099, "calibration/coverage@5%": 0.15804877406469012, "calibration/ece": 0.14876870881845822, "calibration/mean_confidence": 0.6542153198229441, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012760416666666653, "completions/max_length": 3785.4, "completions/max_terminated_length": 3785.4, "completions/mean_length": 725.8693725585938, "completions/mean_terminated_length": 735.1307983398438, "completions/min_length": 0.0, "completions/min_terminated_length": 203.8, "epoch": 0.2519968500393745, "grad_norm": 0.0007082828669808805, "learning_rate": 3.1024096385542172e-06, "loss": -0.0143, "num_tokens": 221456404.0, "reward": 0.9740729570388794, "reward_std": 0.1306391790509224, "rewards/accgated_coverage_0": 0.01679096817970276, "rewards/accgated_coverage_1": 0.01679096817970276, "rewards/accgated_coverage_10": 0.01679096817970276, "rewards/accgated_coverage_15": 0.01679096817970276, "rewards/accgated_coverage_20": 0.01679096817970276, "rewards/accgated_coverage_25": 0.01679096817970276, "rewards/accgated_coverage_5": 0.01679096817970276, "rewards/accuracy_reward": 0.6658854126930237, "rewards/brier_reward": 0.8182917714118958, "rewards/confidence_uniqueness_reward": 0.92508784532547, "rewards/format_reward": 0.9872395753860473, "rewards/frontier_aurc_reward": -0.0014265108155086636, "rewards/frontier_ece_reward": 0.017224435694515705, "rewards/frontier_entropy_batch_reward": -0.40285754203796387, "signal/accgated_coverage_0/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_0/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_1/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_1/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_10/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_10/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_15/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_15/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_20/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_20/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_25/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_25/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_5/centered_abs_mean": 0.06954341232776642, "signal/accgated_coverage_5/group_std_mean": 0.09541115164756775, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006954341474920511, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006954341474920511, "signal/accuracy_reward/centered_abs_mean": 0.16107313334941864, "signal/accuracy_reward/group_std_mean": 0.21490317285060884, "signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08053656667470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08053656667470932, "signal/advantage_abs_mean": 0.0953233152627945, "signal/advantage_pre_scale_abs_mean": 0.0953233152627945, "signal/advantage_pre_scale_std": 0.15531104803085327, "signal/advantage_std": 0.15531104803085327, "signal/brier_reward/centered_abs_mean": 0.1313171371817589, "signal/brier_reward/group_std_mean": 0.1736193746328354, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013131714053452015, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013131714053452015, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03815034255385399, "signal/confidence_uniqueness_reward/group_std_mean": 0.06041007116436958, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038150343578308822, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038150343578308822, "signal/format_reward/centered_abs_mean": 0.021175130270421505, "signal/format_reward/group_std_mean": 0.04042814746499061, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010587565135210752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010587565135210752, "signal/frontier_aurc_reward/centered_abs_mean": 0.001724409847520292, "signal/frontier_aurc_reward/group_std_mean": 0.002808917826041579, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1555123385041953e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1555123385041953e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.029964320734143256, "signal/frontier_ece_reward/group_std_mean": 0.04247441366314888, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002996431989595294, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002996431989595294, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3408441662788391, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4115506410598755, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03408441767096519, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03408441767096519, "step": 105 }, { "calibration/aurc": 0.12744663592497546, "calibration/batch_distribution_entropy": 0.9519339153112962, "calibration/buffer_distribution_entropy": 0.9010714042039174, "calibration/confidence_entropy": 0.4827597225891366, "calibration/coverage@0%": 0.11921970763456483, "calibration/coverage@1%": 0.11921970763456483, "calibration/coverage@10%": 0.4370017656603837, "calibration/coverage@15%": 0.6632638952974681, "calibration/coverage@20%": 0.7718870891274262, "calibration/coverage@25%": 0.8532847355064792, "calibration/coverage@30%": 0.9258258532038015, "calibration/coverage@5%": 0.29218016594856466, "calibration/ece": 0.13218755371540838, "calibration/mean_confidence": 0.5731048630256981, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011024305555555558, "completions/max_length": 3485.8, "completions/max_terminated_length": 3485.8, "completions/mean_length": 725.3192749023438, "completions/mean_terminated_length": 733.386181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 225.2, "epoch": 0.2639967000412495, "grad_norm": 0.0004119804943911731, "learning_rate": 2.9518072289156627e-06, "loss": -0.0112, "num_tokens": 232920530.0, "reward": 0.9900946617126465, "reward_std": 0.1217676728963852, "rewards/accgated_coverage_0": 0.005296125635504722, "rewards/accgated_coverage_1": 0.005296125635504722, "rewards/accgated_coverage_10": 0.005296125635504722, "rewards/accgated_coverage_15": 0.005296125635504722, "rewards/accgated_coverage_20": 0.005296125635504722, "rewards/accgated_coverage_25": 0.005296125635504722, "rewards/accgated_coverage_5": 0.005296125635504722, "rewards/accuracy_reward": 0.6916666626930237, "rewards/brier_reward": 0.8105852246284485, "rewards/confidence_uniqueness_reward": 0.9374894022941589, "rewards/format_reward": 0.9888888955116272, "rewards/frontier_aurc_reward": -0.0009289152920246124, "rewards/frontier_ece_reward": 0.009040744509547949, "rewards/frontier_entropy_batch_reward": -0.2959034085273743, "signal/accgated_coverage_0/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_0/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_1/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_1/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_10/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_10/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_15/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_15/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_20/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_20/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_25/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_25/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_5/centered_abs_mean": 0.09564257562160491, "signal/accgated_coverage_5/group_std_mean": 0.12577018290758132, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009564257692545652, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009564257692545652, "signal/accuracy_reward/centered_abs_mean": 0.17435981035232545, "signal/accuracy_reward/group_std_mean": 0.23566536605358124, "signal/accuracy_reward/group_zero_std_frac": 0.3083333343267441, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08717990517616273, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08717990517616273, "signal/advantage_abs_mean": 0.08975694328546524, "signal/advantage_pre_scale_abs_mean": 0.08975694328546524, "signal/advantage_pre_scale_std": 0.1408482313156128, "signal/advantage_std": 0.1408482313156128, "signal/brier_reward/centered_abs_mean": 0.1274586006999016, "signal/brier_reward/group_std_mean": 0.16526381373405458, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012745860032737255, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012745860032737255, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03015372306108475, "signal/confidence_uniqueness_reward/group_std_mean": 0.04796536043286324, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030153723899275066, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030153723899275066, "signal/format_reward/centered_abs_mean": 0.01713324636220932, "signal/format_reward/group_std_mean": 0.03228283934295177, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00856662318110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00856662318110466, "signal/frontier_aurc_reward/centered_abs_mean": 0.0008368753246031701, "signal/frontier_aurc_reward/group_std_mean": 0.0013955856789834797, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.046094139383058e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.046094139383058e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.023550980538129807, "signal/frontier_ece_reward/group_std_mean": 0.033878795057535174, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002355098072439432, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002355098072439432, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3411864399909973, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41080782413482664, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034118644148111346, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034118644148111346, "step": 110 }, { "calibration/aurc": 0.2565453334486796, "calibration/batch_distribution_entropy": 0.9575537318941236, "calibration/buffer_distribution_entropy": 0.9060671449072653, "calibration/confidence_entropy": 0.46890768621541457, "calibration/coverage@0%": 0.019065294749141152, "calibration/coverage@1%": 0.019065294749141152, "calibration/coverage@10%": 0.15182530570872965, "calibration/coverage@15%": 0.33773805086252534, "calibration/coverage@20%": 0.50743781469926, "calibration/coverage@25%": 0.6094538999487293, "calibration/coverage@30%": 0.6899513244607858, "calibration/coverage@5%": 0.052038626270536105, "calibration/ece": 0.16751469598705682, "calibration/mean_confidence": 0.5752961614557014, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013888888888888885, "completions/max_length": 3733.2, "completions/max_terminated_length": 3733.2, "completions/mean_length": 734.7553833007812, "completions/mean_terminated_length": 745.2282348632813, "completions/min_length": 0.0, "completions/min_terminated_length": 218.4, "epoch": 0.27599655004312446, "grad_norm": 0.00032408704282715917, "learning_rate": 2.8012048192771087e-06, "loss": -0.0141, "num_tokens": 244464112.0, "reward": 0.973526930809021, "reward_std": 0.12945592552423477, "rewards/accgated_coverage_0": 0.019186272844672204, "rewards/accgated_coverage_1": 0.019186272844672204, "rewards/accgated_coverage_10": 0.019186272844672204, "rewards/accgated_coverage_15": 0.019186272844672204, "rewards/accgated_coverage_20": 0.019186272844672204, "rewards/accgated_coverage_25": 0.019186272844672204, "rewards/accgated_coverage_5": 0.019186272844672204, "rewards/accuracy_reward": 0.6506944537162781, "rewards/brier_reward": 0.8081199884414673, "rewards/confidence_uniqueness_reward": 0.9315792918205261, "rewards/format_reward": 0.9860243082046509, "rewards/frontier_aurc_reward": -0.0013923029648140073, "rewards/frontier_ece_reward": 0.013070075027644634, "rewards/frontier_entropy_batch_reward": -0.3352237045764923, "signal/accgated_coverage_0/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_0/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_1/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_1/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_10/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_10/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_15/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_15/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_20/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_20/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_25/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_25/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_5/centered_abs_mean": 0.08040238320827484, "signal/accgated_coverage_5/group_std_mean": 0.10813496261835098, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008040238078683615, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008040238078683615, "signal/accuracy_reward/centered_abs_mean": 0.1705186665058136, "signal/accuracy_reward/group_std_mean": 0.22422258257865907, "signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0852593332529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0852593332529068, "signal/advantage_abs_mean": 0.09446865022182464, "signal/advantage_pre_scale_abs_mean": 0.09446865022182464, "signal/advantage_pre_scale_std": 0.15316152572631836, "signal/advantage_std": 0.15316152572631836, "signal/brier_reward/centered_abs_mean": 0.140705406665802, "signal/brier_reward/group_std_mean": 0.1837473154067993, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01407054141163826, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01407054141163826, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03692381903529167, "signal/confidence_uniqueness_reward/group_std_mean": 0.060953890532255174, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036923819687217476, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036923819687217476, "signal/format_reward/centered_abs_mean": 0.02344292551279068, "signal/format_reward/group_std_mean": 0.045116296410560607, "signal/format_reward/group_zero_std_frac": 0.8111111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01172146275639534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01172146275639534, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015263660810887814, "signal/frontier_aurc_reward/group_std_mean": 0.002488213311880827, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.907957521325443e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.907957521325443e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02705363780260086, "signal/frontier_ece_reward/group_std_mean": 0.03860641121864319, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027053637430071832, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027053637430071832, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34538384675979616, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41308689713478086, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03453838601708412, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03453838601708412, "step": 115 }, { "calibration/aurc": 0.24837072760216533, "calibration/batch_distribution_entropy": 0.9368503432052853, "calibration/buffer_distribution_entropy": 0.9108657331287386, "calibration/confidence_entropy": 0.4455018146660613, "calibration/coverage@0%": 0.04905653133468686, "calibration/coverage@1%": 0.04905653133468686, "calibration/coverage@10%": 0.3206688968562649, "calibration/coverage@15%": 0.42620434852377453, "calibration/coverage@20%": 0.5009301871200499, "calibration/coverage@25%": 0.543208919992705, "calibration/coverage@30%": 0.5796971706454465, "calibration/coverage@5%": 0.18065346401915186, "calibration/ece": 0.15013938661825454, "calibration/mean_confidence": 0.6012813334516616, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888907, "completions/max_length": 3410.2, "completions/max_terminated_length": 3410.2, "completions/mean_length": 718.92509765625, "completions/mean_terminated_length": 725.5808715820312, "completions/min_length": 0.0, "completions/min_terminated_length": 233.2, "epoch": 0.28799640004499943, "grad_norm": 0.0007875562296248972, "learning_rate": 2.6506024096385547e-06, "loss": -0.009, "num_tokens": 255827985.0, "reward": 0.9858316063880921, "reward_std": 0.1202399954199791, "rewards/accgated_coverage_0": 0.017084382567554714, "rewards/accgated_coverage_1": 0.017084382567554714, "rewards/accgated_coverage_10": 0.017084382567554714, "rewards/accgated_coverage_15": 0.017084382567554714, "rewards/accgated_coverage_20": 0.017084382567554714, "rewards/accgated_coverage_25": 0.017084382567554714, "rewards/accgated_coverage_5": 0.017084382567554714, "rewards/accuracy_reward": 0.6697048544883728, "rewards/brier_reward": 0.8128461956977844, "rewards/confidence_uniqueness_reward": 0.9363534331321717, "rewards/format_reward": 0.9907986044883728, "rewards/frontier_aurc_reward": -0.0012421533348970116, "rewards/frontier_ece_reward": 0.012260660342872143, "rewards/frontier_entropy_batch_reward": -0.32509719729423525, "signal/accgated_coverage_0/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_0/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_1/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_1/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_10/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_10/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_15/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_15/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_20/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_20/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_25/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_25/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_5/centered_abs_mean": 0.08290913850069045, "signal/accgated_coverage_5/group_std_mean": 0.11078527420759202, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008290913514792919, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008290913514792919, "signal/accuracy_reward/centered_abs_mean": 0.16545681655406952, "signal/accuracy_reward/group_std_mean": 0.22034453451633454, "signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08272840827703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08272840827703476, "signal/advantage_abs_mean": 0.0883379727602005, "signal/advantage_pre_scale_abs_mean": 0.0883379727602005, "signal/advantage_pre_scale_std": 0.14179351627826692, "signal/advantage_std": 0.14179351627826692, "signal/brier_reward/centered_abs_mean": 0.1315285176038742, "signal/brier_reward/group_std_mean": 0.1732201546430588, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013152851909399032, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013152851909399032, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030239152908325195, "signal/confidence_uniqueness_reward/group_std_mean": 0.048340915143489836, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030239153653383254, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030239153653383254, "signal/format_reward/centered_abs_mean": 0.01531032994389534, "signal/format_reward/group_std_mean": 0.03024934194982052, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00765516497194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00765516497194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012988673988729715, "signal/frontier_aurc_reward/group_std_mean": 0.002020396827720106, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6235843395406847e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6235843395406847e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02591995447874069, "signal/frontier_ece_reward/group_std_mean": 0.037436506152153014, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002591995522379875, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002591995522379875, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.343667733669281, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41231095790863037, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034366774559021, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034366774559021, "step": 120 }, { "calibration/aurc": 0.16554464244255415, "calibration/batch_distribution_entropy": 0.9212177521245579, "calibration/buffer_distribution_entropy": 0.9138018599484876, "calibration/confidence_entropy": 0.4598320839958225, "calibration/coverage@0%": 0.031985780423280416, "calibration/coverage@1%": 0.031985780423280416, "calibration/coverage@10%": 0.3589368386243386, "calibration/coverage@15%": 0.4688489466467215, "calibration/coverage@20%": 0.5399296898806061, "calibration/coverage@25%": 0.8059401316518464, "calibration/coverage@30%": 0.8851748836532867, "calibration/coverage@5%": 0.30297619047619045, "calibration/ece": 0.11906561022394982, "calibration/mean_confidence": 0.6288702607416632, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006597222222222232, "completions/max_length": 3522.8, "completions/max_terminated_length": 3522.8, "completions/mean_length": 714.220751953125, "completions/mean_terminated_length": 718.9944458007812, "completions/min_length": 0.0, "completions/min_terminated_length": 212.6, "epoch": 0.2999962500468744, "grad_norm": 0.0005939038819633424, "learning_rate": 2.5e-06, "loss": -0.0071, "num_tokens": 267173472.0, "reward": 0.9884174346923829, "reward_std": 0.11844182759523392, "rewards/accgated_coverage_0": 0.01884926073253155, "rewards/accgated_coverage_1": 0.01884926073253155, "rewards/accgated_coverage_10": 0.01884926073253155, "rewards/accgated_coverage_15": 0.01884926073253155, "rewards/accgated_coverage_20": 0.01884926073253155, "rewards/accgated_coverage_25": 0.01884926073253155, "rewards/accgated_coverage_5": 0.01884926073253155, "rewards/accuracy_reward": 0.6723090291023255, "rewards/brier_reward": 0.8303997039794921, "rewards/confidence_uniqueness_reward": 0.9367879986763, "rewards/format_reward": 0.9934027791023254, "rewards/frontier_aurc_reward": -0.0010956356301903725, "rewards/frontier_ece_reward": 0.012315450236201286, "rewards/frontier_entropy_batch_reward": -0.35569567084312437, "signal/accgated_coverage_0/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_0/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_1/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_1/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_10/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_10/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_15/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_15/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_20/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_20/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_25/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_25/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_5/centered_abs_mean": 0.07545997649431228, "signal/accgated_coverage_5/group_std_mean": 0.10163306593894958, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00754599766805768, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00754599766805768, "signal/accuracy_reward/centered_abs_mean": 0.16522895097732543, "signal/accuracy_reward/group_std_mean": 0.22105098962783815, "signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08261447548866271, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08261447548866271, "signal/advantage_abs_mean": 0.08631904572248458, "signal/advantage_pre_scale_abs_mean": 0.08631904572248458, "signal/advantage_pre_scale_std": 0.13653431832790375, "signal/advantage_std": 0.13653431832790375, "signal/brier_reward/centered_abs_mean": 0.12054249793291091, "signal/brier_reward/group_std_mean": 0.1602459281682968, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01205424964427948, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01205424964427948, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027296838536858558, "signal/confidence_uniqueness_reward/group_std_mean": 0.04333715438842774, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027296837884932756, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027296837884932756, "signal/format_reward/centered_abs_mean": 0.011990017350763082, "signal/format_reward/group_std_mean": 0.02509169690310955, "signal/format_reward/group_zero_std_frac": 0.8888889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005995008675381541, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005995008675381541, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012402797234244644, "signal/frontier_aurc_reward/group_std_mean": 0.0020731140859425066, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5503496979363262e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5503496979363262e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.023925036564469336, "signal/frontier_ece_reward/group_std_mean": 0.03440321609377861, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023925038054585456, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023925038054585456, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3451420783996582, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4122345566749573, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034514208883047105, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034514208883047105, "step": 125 }, { "calibration/aurc": 0.21284186071868588, "calibration/batch_distribution_entropy": 0.9339286249438772, "calibration/buffer_distribution_entropy": 0.9148087399287421, "calibration/confidence_entropy": 0.45447642600743754, "calibration/coverage@0%": 0.03196024284568126, "calibration/coverage@1%": 0.03196024284568126, "calibration/coverage@10%": 0.21866415120586336, "calibration/coverage@15%": 0.34623181344678866, "calibration/coverage@20%": 0.439894685215483, "calibration/coverage@25%": 0.7419993547570419, "calibration/coverage@30%": 0.8557840171963754, "calibration/coverage@5%": 0.1316335258728029, "calibration/ece": 0.1146119336676791, "calibration/mean_confidence": 0.5895418090310146, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013107638888888884, "completions/max_length": 3611.6, "completions/max_terminated_length": 3611.6, "completions/mean_length": 745.3184204101562, "completions/mean_terminated_length": 755.2761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 211.4, "epoch": 0.3119961000487494, "grad_norm": 0.0004161697579547763, "learning_rate": 2.349397590361446e-06, "loss": -0.0135, "num_tokens": 278884340.0, "reward": 0.9715276241302491, "reward_std": 0.1289304807782173, "rewards/accgated_coverage_0": 0.020680619217455388, "rewards/accgated_coverage_1": 0.020680619217455388, "rewards/accgated_coverage_10": 0.020680619217455388, "rewards/accgated_coverage_15": 0.020680619217455388, "rewards/accgated_coverage_20": 0.020680619217455388, "rewards/accgated_coverage_25": 0.020680619217455388, "rewards/accgated_coverage_5": 0.020680619217455388, "rewards/accuracy_reward": 0.6413194537162781, "rewards/brier_reward": 0.8110544085502625, "rewards/confidence_uniqueness_reward": 0.9331743836402893, "rewards/format_reward": 0.98671875, "rewards/frontier_aurc_reward": -0.0013186614960432053, "rewards/frontier_ece_reward": 0.010826228372752666, "rewards/frontier_entropy_batch_reward": -0.3245693564414978, "signal/accgated_coverage_0/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_0/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_1/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_1/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_10/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_10/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_15/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_15/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_20/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_20/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_25/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_25/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_5/centered_abs_mean": 0.0793161392211914, "signal/accgated_coverage_5/group_std_mean": 0.10691076219081878, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007931614108383656, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007931614108383656, "signal/accuracy_reward/centered_abs_mean": 0.18987630307674408, "signal/accuracy_reward/group_std_mean": 0.2467512845993042, "signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09493815153837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09493815153837204, "signal/advantage_abs_mean": 0.09621814787387847, "signal/advantage_pre_scale_abs_mean": 0.09621814787387847, "signal/advantage_pre_scale_std": 0.15054749250411986, "signal/advantage_std": 0.15054749250411986, "signal/brier_reward/centered_abs_mean": 0.1319481372833252, "signal/brier_reward/group_std_mean": 0.17350344955921174, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013194814324378967, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013194814324378967, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034258627146482465, "signal/confidence_uniqueness_reward/group_std_mean": 0.052195188403129575, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034258626867085694, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034258626867085694, "signal/format_reward/centered_abs_mean": 0.021001519076526164, "signal/format_reward/group_std_mean": 0.03613746054470539, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010500759538263082, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010500759538263082, "signal/frontier_aurc_reward/centered_abs_mean": 0.001310308533720672, "signal/frontier_aurc_reward/group_std_mean": 0.00207103060092777, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.637885670788819e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.637885670788819e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.023271889239549638, "signal/frontier_ece_reward/group_std_mean": 0.033554903045296666, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023271888960152863, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023271888960152863, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33475651144981383, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4035548448562622, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03347565159201622, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03347565159201622, "step": 130 }, { "calibration/aurc": 0.18618817491166895, "calibration/batch_distribution_entropy": 0.9298071065522094, "calibration/buffer_distribution_entropy": 0.9178997148527944, "calibration/confidence_entropy": 0.4370655181611058, "calibration/coverage@0%": 0.12626696858865133, "calibration/coverage@1%": 0.15455687624036638, "calibration/coverage@10%": 0.3530733727917713, "calibration/coverage@15%": 0.42093601050412605, "calibration/coverage@20%": 0.5813107921886217, "calibration/coverage@25%": 0.6828284484205971, "calibration/coverage@30%": 0.7786590645203598, "calibration/coverage@5%": 0.28417016779709464, "calibration/ece": 0.14236897055875175, "calibration/mean_confidence": 0.6090604705723687, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333348, "completions/max_length": 3660.6, "completions/max_terminated_length": 3660.6, "completions/mean_length": 734.3580810546875, "completions/mean_terminated_length": 741.1240966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 203.4, "epoch": 0.32399595005062437, "grad_norm": 0.00042264023795723915, "learning_rate": 2.1987951807228917e-06, "loss": -0.01, "num_tokens": 290437169.0, "reward": 0.9915942430496216, "reward_std": 0.11871129423379898, "rewards/accgated_coverage_0": 0.02424356509000063, "rewards/accgated_coverage_1": 0.02424356509000063, "rewards/accgated_coverage_10": 0.02424356509000063, "rewards/accgated_coverage_15": 0.02424356509000063, "rewards/accgated_coverage_20": 0.02424356509000063, "rewards/accgated_coverage_25": 0.02424356509000063, "rewards/accgated_coverage_5": 0.02424356509000063, "rewards/accuracy_reward": 0.66953125, "rewards/brier_reward": 0.8216690421104431, "rewards/confidence_uniqueness_reward": 0.935891056060791, "rewards/format_reward": 0.9908854246139527, "rewards/frontier_aurc_reward": -0.001132953108754009, "rewards/frontier_ece_reward": 0.011347188241779804, "rewards/frontier_entropy_batch_reward": -0.32461191415786744, "signal/accgated_coverage_0/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_0/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_1/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_1/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_10/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_10/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_15/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_15/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_20/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_20/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_25/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_25/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_5/centered_abs_mean": 0.08618276119232178, "signal/accgated_coverage_5/group_std_mean": 0.11529500484466552, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008618275728076696, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008618275728076696, "signal/accuracy_reward/centered_abs_mean": 0.17056748867034913, "signal/accuracy_reward/group_std_mean": 0.22679646909236909, "signal/accuracy_reward/group_zero_std_frac": 0.35277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08528374433517456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08528374433517456, "signal/advantage_abs_mean": 0.0854735866189003, "signal/advantage_pre_scale_abs_mean": 0.0854735866189003, "signal/advantage_pre_scale_std": 0.13904949724674226, "signal/advantage_std": 0.13904949724674226, "signal/brier_reward/centered_abs_mean": 0.12689192742109298, "signal/brier_reward/group_std_mean": 0.16868027150630951, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012689193338155746, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012689193338155746, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030863118171691895, "signal/confidence_uniqueness_reward/group_std_mean": 0.05242802649736404, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030863119289278986, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030863119289278986, "signal/format_reward/centered_abs_mean": 0.01650933176279068, "signal/format_reward/group_std_mean": 0.03543390221893787, "signal/format_reward/group_zero_std_frac": 0.8388888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00825466588139534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00825466588139534, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012018611654639245, "signal/frontier_aurc_reward/group_std_mean": 0.0018514725845307112, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.502326558693312e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.502326558693312e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.023784752935171127, "signal/frontier_ece_reward/group_std_mean": 0.03346829637885094, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00237847538664937, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00237847538664937, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3354430079460144, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40641710758209226, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033544300496578215, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033544300496578215, "step": 135 }, { "calibration/aurc": 0.10472164908366191, "calibration/batch_distribution_entropy": 0.9028221156209602, "calibration/buffer_distribution_entropy": 0.9232906072427312, "calibration/confidence_entropy": 0.420103138724377, "calibration/coverage@0%": 0.07498175326675494, "calibration/coverage@1%": 0.14625834901143578, "calibration/coverage@10%": 0.6354837792768773, "calibration/coverage@15%": 0.7214461153664524, "calibration/coverage@20%": 0.8011192019352944, "calibration/coverage@25%": 0.8792893660204146, "calibration/coverage@30%": 0.9623549569746952, "calibration/coverage@5%": 0.4253273435119567, "calibration/ece": 0.09601834271891607, "calibration/mean_confidence": 0.6272844677653694, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013194444444444463, "completions/max_length": 3399.4, "completions/max_terminated_length": 3399.4, "completions/mean_length": 726.5508911132813, "completions/mean_terminated_length": 736.2290771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.33599580005249935, "grad_norm": 0.0003059864102397114, "learning_rate": 2.0481927710843377e-06, "loss": -0.0134, "num_tokens": 301911259.0, "reward": 0.9859672069549561, "reward_std": 0.12308523058891296, "rewards/accgated_coverage_0": 0.02728597857058048, "rewards/accgated_coverage_1": 0.02728597857058048, "rewards/accgated_coverage_10": 0.02728597857058048, "rewards/accgated_coverage_15": 0.02728597857058048, "rewards/accgated_coverage_20": 0.02728597857058048, "rewards/accgated_coverage_25": 0.025706437602639198, "rewards/accgated_coverage_5": 0.02728597857058048, "rewards/accuracy_reward": 0.6684027671813965, "rewards/brier_reward": 0.8261925935745239, "rewards/confidence_uniqueness_reward": 0.9272140622138977, "rewards/format_reward": 0.98671875, "rewards/frontier_aurc_reward": -0.0011251185205765069, "rewards/frontier_ece_reward": 0.011587263457477093, "rewards/frontier_entropy_batch_reward": -0.3702110886573792, "signal/accgated_coverage_0/centered_abs_mean": 0.07848915457725525, "signal/accgated_coverage_0/group_std_mean": 0.1055976927280426, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_1/centered_abs_mean": 0.07848915457725525, "signal/accgated_coverage_1/group_std_mean": 0.1055976927280426, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_10/centered_abs_mean": 0.07848915457725525, "signal/accgated_coverage_10/group_std_mean": 0.1055976927280426, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_15/centered_abs_mean": 0.07848915457725525, "signal/accgated_coverage_15/group_std_mean": 0.1055976927280426, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_20/centered_abs_mean": 0.07848915457725525, "signal/accgated_coverage_20/group_std_mean": 0.1055976927280426, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_25/centered_abs_mean": 0.06995586454868316, "signal/accgated_coverage_25/group_std_mean": 0.0945655107498169, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0069955865852534774, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0069955865852534774, "signal/accgated_coverage_5/centered_abs_mean": 0.07848915457725525, "signal/accgated_coverage_5/group_std_mean": 0.1055976927280426, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007848915364593267, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007848915364593267, "signal/accuracy_reward/centered_abs_mean": 0.1587456613779068, "signal/accuracy_reward/group_std_mean": 0.21405453681945802, "signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0793728306889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0793728306889534, "signal/advantage_abs_mean": 0.08728917539119721, "signal/advantage_pre_scale_abs_mean": 0.08728917539119721, "signal/advantage_pre_scale_std": 0.14750308096408843, "signal/advantage_std": 0.14750308096408843, "signal/brier_reward/centered_abs_mean": 0.1274958610534668, "signal/brier_reward/group_std_mean": 0.16804315745830536, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012749586440622806, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012749586440622806, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03858770914375782, "signal/confidence_uniqueness_reward/group_std_mean": 0.06330646127462387, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003858770988881588, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003858770988881588, "signal/format_reward/centered_abs_mean": 0.022553168796002866, "signal/format_reward/group_std_mean": 0.044466794654726985, "signal/format_reward/group_zero_std_frac": 0.8083333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011276584398001433, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011276584398001433, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013644436025060714, "signal/frontier_aurc_reward/group_std_mean": 0.002245521126314998, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7055545686162078e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7055545686162078e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.022616703435778616, "signal/frontier_ece_reward/group_std_mean": 0.03189887069165707, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002261670376174152, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002261670376174152, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33584593534469603, "signal/frontier_entropy_batch_reward/group_std_mean": 0.407626211643219, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0335845947265625, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0335845947265625, "step": 140 }, { "calibration/aurc": 0.14606667298627013, "calibration/batch_distribution_entropy": 0.964540454743454, "calibration/buffer_distribution_entropy": 0.9351459849109356, "calibration/confidence_entropy": 0.45604566754714054, "calibration/coverage@0%": 0.06742711008004583, "calibration/coverage@1%": 0.08357294341337915, "calibration/coverage@10%": 0.4891351777947418, "calibration/coverage@15%": 0.60078833663897, "calibration/coverage@20%": 0.6978462014212975, "calibration/coverage@25%": 0.7915759806586891, "calibration/coverage@30%": 0.8538409493047343, "calibration/coverage@5%": 0.31591807622855894, "calibration/ece": 0.153603334809968, "calibration/mean_confidence": 0.5470196795355173, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009374999999999977, "completions/max_length": 3416.8, "completions/max_terminated_length": 3416.8, "completions/mean_length": 714.3645141601562, "completions/mean_terminated_length": 721.1787475585937, "completions/min_length": 0.0, "completions/min_terminated_length": 234.4, "epoch": 0.34799565005437433, "grad_norm": 0.0002952328941319138, "learning_rate": 1.8975903614457832e-06, "loss": -0.0099, "num_tokens": 313205346.0, "reward": 0.9970819234848023, "reward_std": 0.11172881275415421, "rewards/accgated_coverage_0": 0.015393723733723164, "rewards/accgated_coverage_1": 0.015393723733723164, "rewards/accgated_coverage_10": 0.015393723733723164, "rewards/accgated_coverage_15": 0.015393723733723164, "rewards/accgated_coverage_20": 0.01588248461484909, "rewards/accgated_coverage_25": 0.020709260366857052, "rewards/accgated_coverage_5": 0.015393723733723164, "rewards/accuracy_reward": 0.6959201455116272, "rewards/brier_reward": 0.8239673137664795, "rewards/confidence_uniqueness_reward": 0.9359864592552185, "rewards/format_reward": 0.9906249880790711, "rewards/frontier_aurc_reward": -0.0010717614088207484, "rewards/frontier_ece_reward": 0.0071692907251417635, "rewards/frontier_entropy_batch_reward": -0.34245588183403014, "signal/accgated_coverage_0/centered_abs_mean": 0.08704878985881806, "signal/accgated_coverage_0/group_std_mean": 0.11625861674547196, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_1/centered_abs_mean": 0.08704878985881806, "signal/accgated_coverage_1/group_std_mean": 0.11625861674547196, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_10/centered_abs_mean": 0.08704878985881806, "signal/accgated_coverage_10/group_std_mean": 0.11625861674547196, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_15/centered_abs_mean": 0.08704878985881806, "signal/accgated_coverage_15/group_std_mean": 0.11625861674547196, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_20/centered_abs_mean": 0.0803851142525673, "signal/accgated_coverage_20/group_std_mean": 0.10777752846479416, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008038511220365762, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008038511220365762, "signal/accgated_coverage_25/centered_abs_mean": 0.04402193687856197, "signal/accgated_coverage_25/group_std_mean": 0.05933395996689796, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004402193846181035, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004402193846181035, "signal/accgated_coverage_5/centered_abs_mean": 0.08704878985881806, "signal/accgated_coverage_5/group_std_mean": 0.11625861674547196, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008704879134893418, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008704879134893418, "signal/accuracy_reward/centered_abs_mean": 0.14832357168197632, "signal/accuracy_reward/group_std_mean": 0.20283843576908112, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07416178584098816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07416178584098816, "signal/advantage_abs_mean": 0.0811191275715828, "signal/advantage_pre_scale_abs_mean": 0.0811191275715828, "signal/advantage_pre_scale_std": 0.13390319645404816, "signal/advantage_std": 0.13390319645404816, "signal/brier_reward/centered_abs_mean": 0.12230742424726486, "signal/brier_reward/group_std_mean": 0.16233535408973693, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012230742909014224, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012230742909014224, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03038500025868416, "signal/confidence_uniqueness_reward/group_std_mean": 0.04836958795785904, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00303850001655519, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00303850001655519, "signal/format_reward/centered_abs_mean": 0.016276041604578496, "signal/format_reward/group_std_mean": 0.03133721351623535, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008138020802289248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008138020802289248, "signal/frontier_aurc_reward/centered_abs_mean": 0.001202726038172841, "signal/frontier_aurc_reward/group_std_mean": 0.002039621490985155, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5034074567665812e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5034074567665812e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.018114964291453362, "signal/frontier_ece_reward/group_std_mean": 0.025317597761750223, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018114965176209807, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018114965176209807, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34314724802970886, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4112194418907166, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034314725548028946, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034314725548028946, "step": 145 }, { "calibration/aurc": 0.1502133761388545, "calibration/batch_distribution_entropy": 0.9393864979003841, "calibration/buffer_distribution_entropy": 0.9458424403735097, "calibration/confidence_entropy": 0.44937923792070256, "calibration/coverage@0%": 0.09901511084757157, "calibration/coverage@1%": 0.16974560199691088, "calibration/coverage@10%": 0.5041888110998058, "calibration/coverage@15%": 0.5857952791984206, "calibration/coverage@20%": 0.6541716232657138, "calibration/coverage@25%": 0.7284662615672963, "calibration/coverage@30%": 0.81490193267646, "calibration/coverage@5%": 0.3860676982142951, "calibration/ece": 0.1404094438977152, "calibration/mean_confidence": 0.5738136615962789, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444442, "completions/max_length": 3607.2, "completions/max_terminated_length": 3607.2, "completions/mean_length": 781.7636474609375, "completions/mean_terminated_length": 789.7067504882813, "completions/min_length": 0.0, "completions/min_terminated_length": 237.8, "epoch": 0.3599955000562493, "grad_norm": 0.00030073069501668215, "learning_rate": 1.7469879518072292e-06, "loss": -0.0096, "num_tokens": 325321599.0, "reward": 0.9943321108818054, "reward_std": 0.12259746938943863, "rewards/accgated_coverage_0": 0.021769443340599538, "rewards/accgated_coverage_1": 0.021769443340599538, "rewards/accgated_coverage_10": 0.021769443340599538, "rewards/accgated_coverage_15": 0.021933466009795666, "rewards/accgated_coverage_20": 0.021304438635706902, "rewards/accgated_coverage_25": 0.04014414809644222, "rewards/accgated_coverage_5": 0.021769443340599538, "rewards/accuracy_reward": 0.6783854126930237, "rewards/brier_reward": 0.8334152817726135, "rewards/confidence_uniqueness_reward": 0.9337161183357239, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_aurc_reward": -0.0011517940787598492, "rewards/frontier_ece_reward": 0.00666016167961061, "rewards/frontier_entropy_batch_reward": -0.3414980471134186, "signal/accgated_coverage_0/centered_abs_mean": 0.08002641052007675, "signal/accgated_coverage_0/group_std_mean": 0.10786933153867721, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_1/centered_abs_mean": 0.08002641052007675, "signal/accgated_coverage_1/group_std_mean": 0.10786933153867721, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_10/centered_abs_mean": 0.08002641052007675, "signal/accgated_coverage_10/group_std_mean": 0.10786933153867721, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_15/centered_abs_mean": 0.07641349881887435, "signal/accgated_coverage_15/group_std_mean": 0.10338671654462814, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007641350477933883, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007641350477933883, "signal/accgated_coverage_20/centered_abs_mean": 0.04975445568561554, "signal/accgated_coverage_20/group_std_mean": 0.06870571970939636, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004975445568561554, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004975445568561554, "signal/accgated_coverage_25/centered_abs_mean": 0.03025592640042305, "signal/accgated_coverage_25/group_std_mean": 0.03869783394038677, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003025592723861337, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003025592723861337, "signal/accgated_coverage_5/centered_abs_mean": 0.08002641052007675, "signal/accgated_coverage_5/group_std_mean": 0.10786933153867721, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008002641331404447, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008002641331404447, "signal/accuracy_reward/centered_abs_mean": 0.17113172709941865, "signal/accuracy_reward/group_std_mean": 0.2234587401151657, "signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08556586354970933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08556586354970933, "signal/advantage_abs_mean": 0.0890174686908722, "signal/advantage_pre_scale_abs_mean": 0.0890174686908722, "signal/advantage_pre_scale_std": 0.14541475772857665, "signal/advantage_std": 0.14541475772857665, "signal/brier_reward/centered_abs_mean": 0.12310948371887206, "signal/brier_reward/group_std_mean": 0.1644404262304306, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012310948595404625, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012310948595404625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03251851163804531, "signal/confidence_uniqueness_reward/group_std_mean": 0.053677086532115934, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032518512103706597, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032518512103706597, "signal/format_reward/centered_abs_mean": 0.01814236119389534, "signal/format_reward/group_std_mean": 0.03656843528151512, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00907118059694767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00907118059694767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015089602209627628, "signal/frontier_aurc_reward/group_std_mean": 0.0025732704903930425, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8862002616515382e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8862002616515382e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014805944077670575, "signal/frontier_ece_reward/group_std_mean": 0.019813685864210128, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014805944636464119, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014805944636464119, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32585235834121706, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3952793776988983, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03258523568511009, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03258523568511009, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.16763761841652935, "eval_calibration/batch_distribution_entropy": 0.8990557588565634, "eval_calibration/buffer_distribution_entropy": 0.9513564885877245, "eval_calibration/confidence_entropy": 0.4349930828583795, "eval_calibration/coverage@0%": 0.22465277777777778, "eval_calibration/coverage@1%": 0.22465277777777778, "eval_calibration/coverage@10%": 0.34965277777777776, "eval_calibration/coverage@15%": 0.5496527777777778, "eval_calibration/coverage@20%": 0.7135416666666666, "eval_calibration/coverage@25%": 0.8722222222222222, "eval_calibration/coverage@30%": 0.9406249999999999, "eval_calibration/coverage@5%": 0.33402777777777776, "eval_calibration/ece": 0.22026682861937405, "eval_calibration/mean_confidence": 0.5920676732220265, "eval_completions/clipped_ratio": 0.006944444444444438, "eval_completions/max_length": 2337.6666666666665, "eval_completions/max_terminated_length": 2337.6666666666665, "eval_completions/mean_length": 748.0566202799479, "eval_completions/mean_terminated_length": 753.2368876139323, "eval_completions/min_length": 101.66666666666667, "eval_completions/min_terminated_length": 279.5, "eval_loss": 0.0, "eval_num_tokens": 325321599.0, "eval_reward": 0.916528731584549, "eval_reward_std": 0.2074477275212606, "eval_rewards/accgated_coverage_0": 0.017190332369258005, "eval_rewards/accgated_coverage_1": 0.017190332369258005, "eval_rewards/accgated_coverage_10": 0.017190332369258005, "eval_rewards/accgated_coverage_15": 0.01790264039300382, "eval_rewards/accgated_coverage_20": 0.01782215495283405, "eval_rewards/accgated_coverage_25": 0.04417319502681494, "eval_rewards/accgated_coverage_5": 0.017190332369258005, "eval_rewards/accuracy_reward": 0.6701388855775198, "eval_rewards/brier_reward": 0.8126361072063446, "eval_rewards/confidence_uniqueness_reward": 0.8869705498218536, "eval_rewards/format_reward": 0.9904513855775198, "eval_rewards/frontier_aurc_reward": -0.0019800245742468783, "eval_rewards/frontier_ece_reward": 0.004768568052289386, "eval_rewards/frontier_entropy_batch_reward": -0.9904513855775198, "eval_runtime": 200.2087, "eval_samples_per_second": 4.995, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.1304701641201973, "eval_signal/accgated_coverage_0/group_std_mean": 0.18650069584449133, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.1304701641201973, "eval_signal/accgated_coverage_1/group_std_mean": 0.18650069584449133, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.1304701641201973, "eval_signal/accgated_coverage_10/group_std_mean": 0.18650069584449133, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.10964768255750339, "eval_signal/accgated_coverage_15/group_std_mean": 0.15953792383273444, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.010964768783499798, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.010964768783499798, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.057131893932819366, "eval_signal/accgated_coverage_20/group_std_mean": 0.08694148808717728, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0057131896416346235, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0057131896416346235, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.05311373248696327, "eval_signal/accgated_coverage_25/group_std_mean": 0.06559204993148644, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005311373310784499, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005311373310784499, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.1304701641201973, "eval_signal/accgated_coverage_5/group_std_mean": 0.18650069584449133, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.013047016536196073, "eval_signal/accuracy_reward/centered_abs_mean": 0.4261067758003871, "eval_signal/accuracy_reward/group_std_mean": 0.4669964363177617, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21305338790019354, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21305338790019354, "eval_signal/advantage_abs_mean": 0.17535198479890823, "eval_signal/advantage_pre_scale_abs_mean": 0.17535198479890823, "eval_signal/advantage_pre_scale_std": 0.20822140822807947, "eval_signal/advantage_std": 0.20822140822807947, "eval_signal/brier_reward/centered_abs_mean": 0.1969471424818039, "eval_signal/brier_reward/group_std_mean": 0.25743385901053745, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019694714496533077, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019694714496533077, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05258619785308838, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07910802401602268, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005258619707698624, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005258619707698624, "eval_signal/format_reward/centered_abs_mean": 0.018174913246184587, "eval_signal/format_reward/group_std_mean": 0.045046874321997166, "eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009087456623092294, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009087456623092294, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033884466198893883, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008018870799181363, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2355582081654575e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2355582081654575e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.017925683719416458, "eval_signal/frontier_ece_reward/group_std_mean": 0.02367624578376611, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017925684223882854, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017925684223882854, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018174913246184587, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.045046874321997166, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018174914099896948, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018174914099896948, "eval_steps_per_second": 0.03, "step": 150 }, { "calibration/aurc": 0.13959037762186857, "calibration/batch_distribution_entropy": 0.9571234233060555, "calibration/buffer_distribution_entropy": 0.9544348820115424, "calibration/confidence_entropy": 0.47877571143833497, "calibration/coverage@0%": 0.04338049940871343, "calibration/coverage@1%": 0.04338049940871343, "calibration/coverage@10%": 0.5388304666397344, "calibration/coverage@15%": 0.660080229138832, "calibration/coverage@20%": 0.7443303760611322, "calibration/coverage@25%": 0.8514756742717319, "calibration/coverage@30%": 0.9032110781512721, "calibration/coverage@5%": 0.23531781607810212, "calibration/ece": 0.147932460641434, "calibration/mean_confidence": 0.5904228748409285, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006944444444444442, "completions/max_length": 3571.6, "completions/max_terminated_length": 3571.6, "completions/mean_length": 734.038525390625, "completions/mean_terminated_length": 739.1866455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 247.6, "epoch": 0.3719953500581243, "grad_norm": 0.00034539776970632374, "learning_rate": 1.5963855421686747e-06, "loss": -0.0059, "num_tokens": 336885435.0, "reward": 1.0162927865982057, "reward_std": 0.117668616771698, "rewards/accgated_coverage_0": 0.008730353973805904, "rewards/accgated_coverage_1": 0.008730353973805904, "rewards/accgated_coverage_10": 0.008730353973805904, "rewards/accgated_coverage_15": 0.012687907461076975, "rewards/accgated_coverage_20": 0.023076852411031724, "rewards/accgated_coverage_25": 0.06760275661945343, "rewards/accgated_coverage_5": 0.008730353973805904, "rewards/accuracy_reward": 0.72109375, "rewards/brier_reward": 0.8338244438171387, "rewards/confidence_uniqueness_reward": 0.9384856462478638, "rewards/format_reward": 0.9928819537162781, "rewards/frontier_aurc_reward": -0.0010542726842686534, "rewards/frontier_ece_reward": 0.0029879425885155795, "rewards/frontier_entropy_batch_reward": -0.3204062461853027, "signal/accgated_coverage_0/centered_abs_mean": 0.09136468917131424, "signal/accgated_coverage_0/group_std_mean": 0.12313442379236221, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_1/centered_abs_mean": 0.09136468917131424, "signal/accgated_coverage_1/group_std_mean": 0.12313442379236221, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_10/centered_abs_mean": 0.09136468917131424, "signal/accgated_coverage_10/group_std_mean": 0.12313442379236221, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_15/centered_abs_mean": 0.06817464828491211, "signal/accgated_coverage_15/group_std_mean": 0.09295286387205123, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006817464809864759, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006817464809864759, "signal/accgated_coverage_20/centered_abs_mean": 0.03550227433443069, "signal/accgated_coverage_20/group_std_mean": 0.047702183574438096, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.003550227452069521, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.003550227452069521, "signal/accgated_coverage_25/centered_abs_mean": 0.03964427635073662, "signal/accgated_coverage_25/group_std_mean": 0.05114280804991722, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003964427672326565, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003964427672326565, "signal/accgated_coverage_5/centered_abs_mean": 0.09136468917131424, "signal/accgated_coverage_5/group_std_mean": 0.12313442379236221, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009136468544602394, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009136468544602394, "signal/accuracy_reward/centered_abs_mean": 0.17602538764476777, "signal/accuracy_reward/group_std_mean": 0.22985662817955016, "signal/accuracy_reward/group_zero_std_frac": 0.35833333134651185, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08801269382238389, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08801269382238389, "signal/advantage_abs_mean": 0.08501027822494507, "signal/advantage_pre_scale_abs_mean": 0.08501027822494507, "signal/advantage_pre_scale_std": 0.13985675275325776, "signal/advantage_std": 0.13985675275325776, "signal/brier_reward/centered_abs_mean": 0.11954725980758667, "signal/brier_reward/group_std_mean": 0.15919876992702484, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01195472627878189, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01195472627878189, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0272883802652359, "signal/confidence_uniqueness_reward/group_std_mean": 0.04573171883821488, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027288380078971386, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027288380078971386, "signal/format_reward/centered_abs_mean": 0.01296657994389534, "signal/format_reward/group_std_mean": 0.02831004709005356, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00648328997194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00648328997194767, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015273221535608173, "signal/frontier_aurc_reward/group_std_mean": 0.00275028171017766, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9091527065029368e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9091527065029368e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.012682820670306683, "signal/frontier_ece_reward/group_std_mean": 0.016651974245905875, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012682820903137325, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012682820903137325, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3162439942359924, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38905380964279174, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03162440098822117, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03162440098822117, "step": 155 }, { "calibration/aurc": 0.10743571870653561, "calibration/batch_distribution_entropy": 0.9115237818660422, "calibration/buffer_distribution_entropy": 0.9605668145637967, "calibration/confidence_entropy": 0.46404297224501123, "calibration/coverage@0%": 0.12863544602502677, "calibration/coverage@1%": 0.278052421405394, "calibration/coverage@10%": 0.6681111929140036, "calibration/coverage@15%": 0.7555172651477655, "calibration/coverage@20%": 0.8412781445165628, "calibration/coverage@25%": 0.8842789451144542, "calibration/coverage@30%": 0.9047619047619048, "calibration/coverage@5%": 0.5154270129062687, "calibration/ece": 0.14810043813093507, "calibration/mean_confidence": 0.6534314587185783, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010329861111111116, "completions/max_length": 3365.0, "completions/max_terminated_length": 3365.0, "completions/mean_length": 738.4152099609375, "completions/mean_terminated_length": 746.1383666992188, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.38399520005999926, "grad_norm": 0.00032688508508726954, "learning_rate": 1.4457831325301204e-06, "loss": -0.0093, "num_tokens": 348479274.0, "reward": 0.9841222167015076, "reward_std": 0.12128061801195145, "rewards/accgated_coverage_0": 0.015495671518146991, "rewards/accgated_coverage_1": 0.015495671518146991, "rewards/accgated_coverage_10": 0.015495671518146991, "rewards/accgated_coverage_15": 0.015093481354415416, "rewards/accgated_coverage_20": 0.024507426470518113, "rewards/accgated_coverage_25": 0.06457818299531937, "rewards/accgated_coverage_5": 0.015495671518146991, "rewards/accuracy_reward": 0.6630208373069764, "rewards/brier_reward": 0.8221668839454651, "rewards/confidence_uniqueness_reward": 0.9338045358657837, "rewards/format_reward": 0.9896701455116272, "rewards/frontier_aurc_reward": -0.0016030759084969758, "rewards/frontier_ece_reward": 0.0035916581749916078, "rewards/frontier_entropy_batch_reward": -0.3477570950984955, "signal/accgated_coverage_0/centered_abs_mean": 0.0703942283987999, "signal/accgated_coverage_0/group_std_mean": 0.09567773193120957, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_1/centered_abs_mean": 0.0703942283987999, "signal/accgated_coverage_1/group_std_mean": 0.09567773193120957, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_10/centered_abs_mean": 0.0703942283987999, "signal/accgated_coverage_10/group_std_mean": 0.09567773193120957, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_15/centered_abs_mean": 0.0434797465801239, "signal/accgated_coverage_15/group_std_mean": 0.06042725443840027, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0043479747138917444, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0043479747138917444, "signal/accgated_coverage_20/centered_abs_mean": 0.025829650834202765, "signal/accgated_coverage_20/group_std_mean": 0.03384415283799171, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0025829650927335023, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0025829650927335023, "signal/accgated_coverage_25/centered_abs_mean": 0.03724170736968517, "signal/accgated_coverage_25/group_std_mean": 0.04777343571186066, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0037241708021610977, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0037241708021610977, "signal/accgated_coverage_5/centered_abs_mean": 0.0703942283987999, "signal/accgated_coverage_5/group_std_mean": 0.09567773193120957, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0070394231006503105, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0070394231006503105, "signal/accuracy_reward/centered_abs_mean": 0.16209852695465088, "signal/accuracy_reward/group_std_mean": 0.21041282415390014, "signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08104926347732544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08104926347732544, "signal/advantage_abs_mean": 0.09037692099809647, "signal/advantage_pre_scale_abs_mean": 0.09037692099809647, "signal/advantage_pre_scale_std": 0.14657594561576842, "signal/advantage_std": 0.14657594561576842, "signal/brier_reward/centered_abs_mean": 0.11801843196153641, "signal/brier_reward/group_std_mean": 0.1553761065006256, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011801843531429768, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011801843531429768, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03203802034258842, "signal/confidence_uniqueness_reward/group_std_mean": 0.051499532908201216, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003203802043572068, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003203802043572068, "signal/format_reward/centered_abs_mean": 0.01785481758415699, "signal/format_reward/group_std_mean": 0.03447670228779316, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008927408792078494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008927408792078494, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017638769699260593, "signal/frontier_aurc_reward/group_std_mean": 0.0029752728529274463, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2048461687518284e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2048461687518284e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.011287710629403591, "signal/frontier_ece_reward/group_std_mean": 0.01489656399935484, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011287711327895521, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011287711327895521, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3267548501491547, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39591810703277586, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03267548531293869, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03267548531293869, "step": 160 }, { "calibration/aurc": 0.13707453862890023, "calibration/batch_distribution_entropy": 0.9393998078072607, "calibration/buffer_distribution_entropy": 0.963943720408478, "calibration/confidence_entropy": 0.4556464069393563, "calibration/coverage@0%": 0.11347585474513056, "calibration/coverage@1%": 0.17646798072938252, "calibration/coverage@10%": 0.5324355832987981, "calibration/coverage@15%": 0.6414469246442879, "calibration/coverage@20%": 0.7144313147534189, "calibration/coverage@25%": 0.7613995614035087, "calibration/coverage@30%": 0.8631929824561404, "calibration/coverage@5%": 0.36714439494405304, "calibration/ece": 0.12891001371722027, "calibration/mean_confidence": 0.5747154789833697, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3487.2, "completions/max_terminated_length": 3487.2, "completions/mean_length": 753.5942016601563, "completions/mean_terminated_length": 761.6573120117188, "completions/min_length": 0.0, "completions/min_terminated_length": 244.2, "epoch": 0.39599505006187424, "grad_norm": 0.00033790257293730974, "learning_rate": 1.2951807228915664e-06, "loss": -0.0077, "num_tokens": 360299751.0, "reward": 0.9920899629592895, "reward_std": 0.12097463011741638, "rewards/accgated_coverage_0": 0.025350801181048155, "rewards/accgated_coverage_1": 0.025350801181048155, "rewards/accgated_coverage_10": 0.025350801181048155, "rewards/accgated_coverage_15": 0.023273496888577938, "rewards/accgated_coverage_20": 0.03261325098574162, "rewards/accgated_coverage_25": 0.07034600675106048, "rewards/accgated_coverage_5": 0.025350801181048155, "rewards/accuracy_reward": 0.6567708373069763, "rewards/brier_reward": 0.8295871496200562, "rewards/confidence_uniqueness_reward": 0.936906611919403, "rewards/format_reward": 0.9895833253860473, "rewards/frontier_aurc_reward": -0.001301741786301136, "rewards/frontier_ece_reward": 0.003387619974091649, "rewards/frontier_entropy_batch_reward": -0.30822600722312926, "signal/accgated_coverage_0/centered_abs_mean": 0.07112332507967949, "signal/accgated_coverage_0/group_std_mean": 0.09761943519115449, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_1/centered_abs_mean": 0.07112332507967949, "signal/accgated_coverage_1/group_std_mean": 0.09761943519115449, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_10/centered_abs_mean": 0.07112332507967949, "signal/accgated_coverage_10/group_std_mean": 0.09761943519115449, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_15/centered_abs_mean": 0.03819368332624436, "signal/accgated_coverage_15/group_std_mean": 0.0530736930668354, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0038193685468286276, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0038193685468286276, "signal/accgated_coverage_20/centered_abs_mean": 0.025878940895199775, "signal/accgated_coverage_20/group_std_mean": 0.03326713815331459, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002587894257158041, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002587894257158041, "signal/accgated_coverage_25/centered_abs_mean": 0.03837202824652195, "signal/accgated_coverage_25/group_std_mean": 0.05023747906088829, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0038372030016034842, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0038372030016034842, "signal/accgated_coverage_5/centered_abs_mean": 0.07112332507967949, "signal/accgated_coverage_5/group_std_mean": 0.09761943519115449, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00711233289912343, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00711233289912343, "signal/accuracy_reward/centered_abs_mean": 0.15069444477558136, "signal/accuracy_reward/group_std_mean": 0.20632360279560089, "signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07534722238779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07534722238779068, "signal/advantage_abs_mean": 0.08762808591127395, "signal/advantage_pre_scale_abs_mean": 0.08762808591127395, "signal/advantage_pre_scale_std": 0.14219435751438142, "signal/advantage_std": 0.14219435751438142, "signal/brier_reward/centered_abs_mean": 0.11296486258506774, "signal/brier_reward/group_std_mean": 0.1534811317920685, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011296486295759679, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011296486295759679, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03022078201174736, "signal/confidence_uniqueness_reward/group_std_mean": 0.050181590020656586, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003022078238427639, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003022078238427639, "signal/format_reward/centered_abs_mean": 0.017621527798473834, "signal/format_reward/group_std_mean": 0.03512752801179886, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008810763899236917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008810763899236917, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014444491360336542, "signal/frontier_aurc_reward/group_std_mean": 0.002510636835359037, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8055615328194108e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8055615328194108e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010616243071854114, "signal/frontier_ece_reward/group_std_mean": 0.014283826760947704, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010616243351250886, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010616243351250886, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32418252229690553, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39595122933387755, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0324182540178299, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0324182540178299, "step": 165 }, { "calibration/aurc": 0.09379690694605691, "calibration/batch_distribution_entropy": 0.8875218762767598, "calibration/buffer_distribution_entropy": 0.9656728923508411, "calibration/confidence_entropy": 0.44091941057216244, "calibration/coverage@0%": 0.06576609847566676, "calibration/coverage@1%": 0.1662869318090001, "calibration/coverage@10%": 0.7042785981870985, "calibration/coverage@15%": 0.797162939172109, "calibration/coverage@20%": 0.8705423172727755, "calibration/coverage@25%": 0.9271389712655221, "calibration/coverage@30%": 0.96880830522678, "calibration/coverage@5%": 0.33866477844879533, "calibration/ece": 0.08406840250541019, "calibration/mean_confidence": 0.6761357102230366, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007986111111111093, "completions/max_length": 3577.8, "completions/max_terminated_length": 3577.8, "completions/mean_length": 725.5506103515625, "completions/mean_terminated_length": 731.4747314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 232.2, "epoch": 0.4079949000637492, "grad_norm": 0.0004053888551425189, "learning_rate": 1.1445783132530121e-06, "loss": -0.0075, "num_tokens": 371747278.0, "reward": 1.0126575469970702, "reward_std": 0.12015107423067092, "rewards/accgated_coverage_0": 0.017383670061826707, "rewards/accgated_coverage_1": 0.017383670061826707, "rewards/accgated_coverage_10": 0.017383670061826707, "rewards/accgated_coverage_15": 0.02135354969650507, "rewards/accgated_coverage_20": 0.04693642929196358, "rewards/accgated_coverage_25": 0.10021267533302307, "rewards/accgated_coverage_5": 0.017383670061826707, "rewards/accuracy_reward": 0.70546875, "rewards/brier_reward": 0.8442538380622864, "rewards/confidence_uniqueness_reward": 0.9332508206367492, "rewards/format_reward": 0.9918402791023254, "rewards/frontier_aurc_reward": -0.0012258694274351, "rewards/frontier_ece_reward": 0.002612181368749589, "rewards/frontier_entropy_batch_reward": -0.37797077298164367, "signal/accgated_coverage_0/centered_abs_mean": 0.07582537084817886, "signal/accgated_coverage_0/group_std_mean": 0.10270393788814544, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_1/centered_abs_mean": 0.07582537084817886, "signal/accgated_coverage_1/group_std_mean": 0.10270393788814544, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_10/centered_abs_mean": 0.07582537084817886, "signal/accgated_coverage_10/group_std_mean": 0.10270393788814544, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_15/centered_abs_mean": 0.03511426188051701, "signal/accgated_coverage_15/group_std_mean": 0.047978077083826065, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0035114262253046038, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0035114262253046038, "signal/accgated_coverage_20/centered_abs_mean": 0.030998488515615465, "signal/accgated_coverage_20/group_std_mean": 0.0399199478328228, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0030998490750789643, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0030998490750789643, "signal/accgated_coverage_25/centered_abs_mean": 0.05178140699863434, "signal/accgated_coverage_25/group_std_mean": 0.06730167269706726, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005178140755742788, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005178140755742788, "signal/accgated_coverage_5/centered_abs_mean": 0.07582537084817886, "signal/accgated_coverage_5/group_std_mean": 0.10270393788814544, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007582537457346916, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007582537457346916, "signal/accuracy_reward/centered_abs_mean": 0.155908203125, "signal/accuracy_reward/group_std_mean": 0.20903717577457429, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0779541015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0779541015625, "signal/advantage_abs_mean": 0.08711908012628555, "signal/advantage_pre_scale_abs_mean": 0.08711908012628555, "signal/advantage_pre_scale_std": 0.14394052624702453, "signal/advantage_std": 0.14394052624702453, "signal/brier_reward/centered_abs_mean": 0.11152398288249969, "signal/brier_reward/group_std_mean": 0.14993155002593994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011152398772537708, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011152398772537708, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029369105771183967, "signal/confidence_uniqueness_reward/group_std_mean": 0.04833545163273811, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029369106981903315, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029369106981903315, "signal/format_reward/centered_abs_mean": 0.01460503451526165, "signal/format_reward/group_std_mean": 0.030626020580530166, "signal/format_reward/group_zero_std_frac": 0.8611111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007302517257630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007302517257630825, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016249929554760455, "signal/frontier_aurc_reward/group_std_mean": 0.0030195300932973623, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0312412016210146e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0312412016210146e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010075355507433414, "signal/frontier_ece_reward/group_std_mean": 0.013354136049747467, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010075355181470512, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010075355181470512, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32258252501487733, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3932394325733185, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03225825130939484, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225825130939484, "step": 170 }, { "calibration/aurc": 0.11142001321196293, "calibration/batch_distribution_entropy": 0.9423204741141762, "calibration/buffer_distribution_entropy": 0.9661498449320355, "calibration/confidence_entropy": 0.45821246300454394, "calibration/coverage@0%": 0.1898169706104169, "calibration/coverage@1%": 0.2035106542946567, "calibration/coverage@10%": 0.533609767462017, "calibration/coverage@15%": 0.6553213812147145, "calibration/coverage@20%": 0.803748541020554, "calibration/coverage@25%": 0.9049741637390127, "calibration/coverage@30%": 0.9748223193534041, "calibration/coverage@5%": 0.33688175700535544, "calibration/ece": 0.12908904507599772, "calibration/mean_confidence": 0.5928169319155115, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012239583333333326, "completions/max_length": 3628.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 751.97587890625, "completions/mean_terminated_length": 761.2690673828125, "completions/min_length": 0.0, "completions/min_terminated_length": 224.6, "epoch": 0.4199947500656242, "grad_norm": 0.000337122764904052, "learning_rate": 9.93975903614458e-07, "loss": -0.0114, "num_tokens": 383518008.0, "reward": 1.006504440307617, "reward_std": 0.12649587243795396, "rewards/accgated_coverage_0": 0.022072068974375726, "rewards/accgated_coverage_1": 0.022072068974375726, "rewards/accgated_coverage_10": 0.02190903965383768, "rewards/accgated_coverage_15": 0.023819806054234505, "rewards/accgated_coverage_20": 0.04952913150191307, "rewards/accgated_coverage_25": 0.09890762567520142, "rewards/accgated_coverage_5": 0.022072068974375726, "rewards/accuracy_reward": 0.6897569417953491, "rewards/brier_reward": 0.8337709426879882, "rewards/confidence_uniqueness_reward": 0.9315677404403686, "rewards/format_reward": 0.9875868082046508, "rewards/frontier_aurc_reward": -0.0010118687408976258, "rewards/frontier_ece_reward": 0.00214645602973178, "rewards/frontier_entropy_batch_reward": -0.34941497445106506, "signal/accgated_coverage_0/centered_abs_mean": 0.08707907050848007, "signal/accgated_coverage_0/group_std_mean": 0.11673283874988556, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00870790733024478, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00870790733024478, "signal/accgated_coverage_1/centered_abs_mean": 0.08707907050848007, "signal/accgated_coverage_1/group_std_mean": 0.11673283874988556, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00870790733024478, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00870790733024478, "signal/accgated_coverage_10/centered_abs_mean": 0.0848253458738327, "signal/accgated_coverage_10/group_std_mean": 0.11379681825637818, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008482534252107144, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008482534252107144, "signal/accgated_coverage_15/centered_abs_mean": 0.03722478076815605, "signal/accgated_coverage_15/group_std_mean": 0.04985408037900925, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003722478076815605, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003722478076815605, "signal/accgated_coverage_20/centered_abs_mean": 0.03205550014972687, "signal/accgated_coverage_20/group_std_mean": 0.04064697846770286, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0032055501360446215, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0032055501360446215, "signal/accgated_coverage_25/centered_abs_mean": 0.04977491497993469, "signal/accgated_coverage_25/group_std_mean": 0.06415863260626793, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004977491591125727, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004977491591125727, "signal/accgated_coverage_5/centered_abs_mean": 0.08707907050848007, "signal/accgated_coverage_5/group_std_mean": 0.11673283874988556, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00870790733024478, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00870790733024478, "signal/accuracy_reward/centered_abs_mean": 0.17249348759651184, "signal/accuracy_reward/group_std_mean": 0.2279975652694702, "signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08624674379825592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08624674379825592, "signal/advantage_abs_mean": 0.09142151474952698, "signal/advantage_pre_scale_abs_mean": 0.09142151474952698, "signal/advantage_pre_scale_std": 0.15212540030479432, "signal/advantage_std": 0.15212540030479432, "signal/brier_reward/centered_abs_mean": 0.12030397355556488, "signal/brier_reward/group_std_mean": 0.15930656492710113, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012030397728085517, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012030397728085517, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.035230952501297, "signal/confidence_uniqueness_reward/group_std_mean": 0.05694400668144226, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035230953246355055, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035230953246355055, "signal/format_reward/centered_abs_mean": 0.02116427943110466, "signal/format_reward/group_std_mean": 0.04019532725214958, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01058213971555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01058213971555233, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012590843951329588, "signal/frontier_aurc_reward/group_std_mean": 0.0022685666335746646, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5738555521238596e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5738555521238596e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01081162467598915, "signal/frontier_ece_reward/group_std_mean": 0.014261576719582081, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001081162504851818, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001081162504851818, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32662315368652345, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3984943747520447, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03266231343150139, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03266231343150139, "step": 175 }, { "calibration/aurc": 0.08382240776518861, "calibration/batch_distribution_entropy": 0.9263015077586324, "calibration/buffer_distribution_entropy": 0.9654356750160844, "calibration/confidence_entropy": 0.44053076052748086, "calibration/coverage@0%": 0.11934866521125106, "calibration/coverage@1%": 0.17077675660910055, "calibration/coverage@10%": 0.6990107712808307, "calibration/coverage@15%": 0.807274710180921, "calibration/coverage@20%": 0.8950033786567826, "calibration/coverage@25%": 0.9454006875453829, "calibration/coverage@30%": 0.9649214659685864, "calibration/coverage@5%": 0.5336004888954191, "calibration/ece": 0.11662381439413354, "calibration/mean_confidence": 0.6396629208521569, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333348, "completions/max_length": 3815.8, "completions/max_terminated_length": 3815.8, "completions/mean_length": 724.7723876953125, "completions/mean_terminated_length": 733.14853515625, "completions/min_length": 0.0, "completions/min_terminated_length": 226.4, "epoch": 0.4319946000674992, "grad_norm": 0.00037727519520558417, "learning_rate": 8.433734939759036e-07, "loss": -0.0111, "num_tokens": 394967354.0, "reward": 1.0016642928123474, "reward_std": 0.12620580643415452, "rewards/accgated_coverage_0": 0.021729044057428835, "rewards/accgated_coverage_1": 0.021729044057428835, "rewards/accgated_coverage_10": 0.02150404118001461, "rewards/accgated_coverage_15": 0.02417885847389698, "rewards/accgated_coverage_20": 0.052081949263811114, "rewards/accgated_coverage_25": 0.10146590769290924, "rewards/accgated_coverage_5": 0.021729044057428835, "rewards/accuracy_reward": 0.6892361044883728, "rewards/brier_reward": 0.829409658908844, "rewards/confidence_uniqueness_reward": 0.9283636331558227, "rewards/format_reward": 0.9884548664093018, "rewards/frontier_aurc_reward": -0.0018609853694215416, "rewards/frontier_ece_reward": 0.0019440729709458537, "rewards/frontier_entropy_batch_reward": -0.39571504592895507, "signal/accgated_coverage_0/centered_abs_mean": 0.0820931151509285, "signal/accgated_coverage_0/group_std_mean": 0.11036419868469238, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008209311775863171, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008209311775863171, "signal/accgated_coverage_1/centered_abs_mean": 0.0820931151509285, "signal/accgated_coverage_1/group_std_mean": 0.11036419868469238, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008209311775863171, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008209311775863171, "signal/accgated_coverage_10/centered_abs_mean": 0.07484557554125786, "signal/accgated_coverage_10/group_std_mean": 0.1010680690407753, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0074845578521490095, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0074845578521490095, "signal/accgated_coverage_15/centered_abs_mean": 0.033568178117275235, "signal/accgated_coverage_15/group_std_mean": 0.044669998437166215, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003356817737221718, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003356817737221718, "signal/accgated_coverage_20/centered_abs_mean": 0.034712836146354675, "signal/accgated_coverage_20/group_std_mean": 0.044363278150558474, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034712836146354674, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034712836146354674, "signal/accgated_coverage_25/centered_abs_mean": 0.05585875362157822, "signal/accgated_coverage_25/group_std_mean": 0.07269425168633462, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005585875362157822, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005585875362157822, "signal/accgated_coverage_5/centered_abs_mean": 0.0820931151509285, "signal/accgated_coverage_5/group_std_mean": 0.11036419868469238, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008209311775863171, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008209311775863171, "signal/accuracy_reward/centered_abs_mean": 0.16368272602558137, "signal/accuracy_reward/group_std_mean": 0.21843498945236206, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08184136301279069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08184136301279069, "signal/advantage_abs_mean": 0.09044176638126374, "signal/advantage_pre_scale_abs_mean": 0.09044176638126374, "signal/advantage_pre_scale_std": 0.15314349234104158, "signal/advantage_std": 0.15314349234104158, "signal/brier_reward/centered_abs_mean": 0.12139843702316284, "signal/brier_reward/group_std_mean": 0.162265807390213, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012139843590557576, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012139843590557576, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03472979925572872, "signal/confidence_uniqueness_reward/group_std_mean": 0.053919277340173724, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003472979832440615, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003472979832440615, "signal/format_reward/centered_abs_mean": 0.018885633535683156, "signal/format_reward/group_std_mean": 0.03502344973385334, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009442816767841578, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009442816767841578, "signal/frontier_aurc_reward/centered_abs_mean": 0.002187176514416933, "signal/frontier_aurc_reward/group_std_mean": 0.004075382417067885, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.733970650297124e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.733970650297124e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009765653498470783, "signal/frontier_ece_reward/group_std_mean": 0.012881954945623875, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000976565305609256, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000976565305609256, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33729991912841795, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40356319546699526, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033729993551969525, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033729993551969525, "step": 180 }, { "calibration/aurc": 0.16190140374322814, "calibration/batch_distribution_entropy": 0.937421210212176, "calibration/buffer_distribution_entropy": 0.9641604640658331, "calibration/confidence_entropy": 0.4606058652211222, "calibration/coverage@0%": 0.024603379429687745, "calibration/coverage@1%": 0.024603379429687745, "calibration/coverage@10%": 0.2731480414947021, "calibration/coverage@15%": 0.5704934176134204, "calibration/coverage@20%": 0.8450828196798226, "calibration/coverage@25%": 0.913627509584862, "calibration/coverage@30%": 0.9366492146596859, "calibration/coverage@5%": 0.15475558298909925, "calibration/ece": 0.16852490293275388, "calibration/mean_confidence": 0.5998936936106494, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008940972222222187, "completions/max_length": 3640.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 731.548876953125, "completions/mean_terminated_length": 738.14892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 221.6, "epoch": 0.44399445006937416, "grad_norm": 0.00037856833660043776, "learning_rate": 6.927710843373495e-07, "loss": -0.0066, "num_tokens": 406484845.0, "reward": 1.000522756576538, "reward_std": 0.12429632395505905, "rewards/accgated_coverage_0": 0.02475869134068489, "rewards/accgated_coverage_1": 0.02475869134068489, "rewards/accgated_coverage_10": 0.024336008355021477, "rewards/accgated_coverage_15": 0.02571437545120716, "rewards/accgated_coverage_20": 0.05004611238837242, "rewards/accgated_coverage_25": 0.09355643838644027, "rewards/accgated_coverage_5": 0.024745855107903482, "rewards/accuracy_reward": 0.6691840171813965, "rewards/brier_reward": 0.8300941109657287, "rewards/confidence_uniqueness_reward": 0.9367785811424255, "rewards/format_reward": 0.9910590291023255, "rewards/frontier_aurc_reward": -0.0013151126448065042, "rewards/frontier_ece_reward": 0.0014089885400608182, "rewards/frontier_entropy_batch_reward": -0.33202074766159057, "signal/accgated_coverage_0/centered_abs_mean": 0.08384153693914413, "signal/accgated_coverage_0/group_std_mean": 0.111553256213665, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008384153712540865, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008384153712540865, "signal/accgated_coverage_1/centered_abs_mean": 0.08384153693914413, "signal/accgated_coverage_1/group_std_mean": 0.111553256213665, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008384153712540865, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008384153712540865, "signal/accgated_coverage_10/centered_abs_mean": 0.06902736574411392, "signal/accgated_coverage_10/group_std_mean": 0.09257243126630783, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006902736704796552, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006902736704796552, "signal/accgated_coverage_15/centered_abs_mean": 0.030930518358945846, "signal/accgated_coverage_15/group_std_mean": 0.040364190191030505, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0030930519569665194, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0030930519569665194, "signal/accgated_coverage_20/centered_abs_mean": 0.03231954351067543, "signal/accgated_coverage_20/group_std_mean": 0.041446197777986526, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0032319542951881886, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0032319542951881886, "signal/accgated_coverage_25/centered_abs_mean": 0.051111014932394026, "signal/accgated_coverage_25/group_std_mean": 0.06704937815666198, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005111101549118757, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005111101549118757, "signal/accgated_coverage_5/centered_abs_mean": 0.08381979912519455, "signal/accgated_coverage_5/group_std_mean": 0.1115255281329155, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008381979819387198, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008381979819387198, "signal/accuracy_reward/centered_abs_mean": 0.1699598550796509, "signal/accuracy_reward/group_std_mean": 0.22352631986141205, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08497992753982545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08497992753982545, "signal/advantage_abs_mean": 0.09083193242549896, "signal/advantage_pre_scale_abs_mean": 0.09083193242549896, "signal/advantage_pre_scale_std": 0.14602271616458892, "signal/advantage_std": 0.14602271616458892, "signal/brier_reward/centered_abs_mean": 0.12183561623096466, "signal/brier_reward/group_std_mean": 0.1614099621772766, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01218356229364872, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01218356229364872, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028073800355196, "signal/confidence_uniqueness_reward/group_std_mean": 0.046647604554891586, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028073799796402453, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028073799796402453, "signal/format_reward/centered_abs_mean": 0.014816623367369174, "signal/format_reward/group_std_mean": 0.030689219757914544, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007408311683684587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007408311683684587, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015788348391652107, "signal/frontier_aurc_reward/group_std_mean": 0.0030495470855385065, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9735435853363014e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9735435853363014e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010178772546350957, "signal/frontier_ece_reward/group_std_mean": 0.013282094523310662, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010178772499784827, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010178772499784827, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3281850337982178, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3985232710838318, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032818502932786944, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032818502932786944, "step": 185 }, { "calibration/aurc": 0.14453371099003726, "calibration/batch_distribution_entropy": 0.9147580072034586, "calibration/buffer_distribution_entropy": 0.9639425978438201, "calibration/confidence_entropy": 0.4524898443515701, "calibration/coverage@0%": 0.09274777052981203, "calibration/coverage@1%": 0.12086448140514361, "calibration/coverage@10%": 0.37908342004865136, "calibration/coverage@15%": 0.5675797783755056, "calibration/coverage@20%": 0.713385961637515, "calibration/coverage@25%": 0.9190044798065162, "calibration/coverage@30%": 0.9780470800524934, "calibration/coverage@5%": 0.29242188738490793, "calibration/ece": 0.14082698722784545, "calibration/mean_confidence": 0.6366452158551367, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007552083333333348, "completions/max_length": 3550.4, "completions/max_terminated_length": 3550.4, "completions/mean_length": 719.5197021484375, "completions/mean_terminated_length": 724.99267578125, "completions/min_length": 0.0, "completions/min_terminated_length": 219.2, "epoch": 0.45599430007124914, "grad_norm": 0.0003734733909368515, "learning_rate": 5.421686746987952e-07, "loss": -0.0063, "num_tokens": 417856656.0, "reward": 1.0197997450828553, "reward_std": 0.1204053670167923, "rewards/accgated_coverage_0": 0.01778254872187972, "rewards/accgated_coverage_1": 0.01778254872187972, "rewards/accgated_coverage_10": 0.0201628603041172, "rewards/accgated_coverage_15": 0.029619522020220758, "rewards/accgated_coverage_20": 0.06293513551354409, "rewards/accgated_coverage_25": 0.11633996367454529, "rewards/accgated_coverage_5": 0.017779755219817162, "rewards/accuracy_reward": 0.7078993082046509, "rewards/brier_reward": 0.8345678091049195, "rewards/confidence_uniqueness_reward": 0.9368413925170899, "rewards/format_reward": 0.9924479246139526, "rewards/frontier_aurc_reward": -0.0012165130581706762, "rewards/frontier_ece_reward": -0.00039820900419726966, "rewards/frontier_entropy_batch_reward": -0.35700035095214844, "signal/accgated_coverage_0/centered_abs_mean": 0.09048043787479401, "signal/accgated_coverage_0/group_std_mean": 0.12144993394613265, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009048044122755528, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009048044122755528, "signal/accgated_coverage_1/centered_abs_mean": 0.09048043787479401, "signal/accgated_coverage_1/group_std_mean": 0.12144993394613265, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009048044122755528, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009048044122755528, "signal/accgated_coverage_10/centered_abs_mean": 0.06684454083442688, "signal/accgated_coverage_10/group_std_mean": 0.09066204130649566, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006684453692287207, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006684453692287207, "signal/accgated_coverage_15/centered_abs_mean": 0.03273606859147549, "signal/accgated_coverage_15/group_std_mean": 0.04233010783791542, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0032736069057136773, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0032736069057136773, "signal/accgated_coverage_20/centered_abs_mean": 0.03768849298357964, "signal/accgated_coverage_20/group_std_mean": 0.04803970232605934, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0037688495591282844, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0037688495591282844, "signal/accgated_coverage_25/centered_abs_mean": 0.05939576998353004, "signal/accgated_coverage_25/group_std_mean": 0.07729223221540452, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005939576961100102, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005939576961100102, "signal/accgated_coverage_5/centered_abs_mean": 0.09042053371667862, "signal/accgated_coverage_5/group_std_mean": 0.12137292772531509, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009042053669691085, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009042053669691085, "signal/accuracy_reward/centered_abs_mean": 0.16521809995174408, "signal/accuracy_reward/group_std_mean": 0.21986591517925264, "signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08260904997587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08260904997587204, "signal/advantage_abs_mean": 0.08695598244667054, "signal/advantage_pre_scale_abs_mean": 0.08695598244667054, "signal/advantage_pre_scale_std": 0.14129080772399902, "signal/advantage_std": 0.14129080772399902, "signal/brier_reward/centered_abs_mean": 0.11943345665931701, "signal/brier_reward/group_std_mean": 0.15741896331310273, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011943346075713634, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011943346075713634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027682187035679817, "signal/confidence_uniqueness_reward/group_std_mean": 0.04504421055316925, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002768218796700239, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002768218796700239, "signal/format_reward/centered_abs_mean": 0.01363389752805233, "signal/format_reward/group_std_mean": 0.028098611906170846, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006816948764026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006816948764026165, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013316195458173753, "signal/frontier_aurc_reward/group_std_mean": 0.0022581091150641443, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6645244613755494e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6645244613755494e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010017194785177708, "signal/frontier_ece_reward/group_std_mean": 0.012919113039970398, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010017195134423673, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010017195134423673, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3350070595741272, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40499748587608336, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033500705286860465, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033500705286860465, "step": 190 }, { "calibration/aurc": 0.14915417266796935, "calibration/batch_distribution_entropy": 0.9576361146093342, "calibration/buffer_distribution_entropy": 0.9639255010791874, "calibration/confidence_entropy": 0.4497387116426815, "calibration/coverage@0%": 0.045690067003181546, "calibration/coverage@1%": 0.10611787448981255, "calibration/coverage@10%": 0.3900509318187466, "calibration/coverage@15%": 0.4850762773690467, "calibration/coverage@20%": 0.7097961256800407, "calibration/coverage@25%": 0.8674547346009558, "calibration/coverage@30%": 0.9342891171376101, "calibration/coverage@5%": 0.2691564589599168, "calibration/ece": 0.1618369751691793, "calibration/mean_confidence": 0.5709995873050344, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013020833333333325, "completions/max_length": 3631.6, "completions/max_terminated_length": 3631.6, "completions/mean_length": 740.03681640625, "completions/mean_terminated_length": 749.9174682617188, "completions/min_length": 0.0, "completions/min_terminated_length": 210.4, "epoch": 0.46799415007312406, "grad_norm": 0.00039503860170952976, "learning_rate": 3.91566265060241e-07, "loss": -0.0122, "num_tokens": 429462744.0, "reward": 0.9920111894607544, "reward_std": 0.13194906413555146, "rewards/accgated_coverage_0": 0.019916841574013234, "rewards/accgated_coverage_1": 0.019916841574013234, "rewards/accgated_coverage_10": 0.0193513598293066, "rewards/accgated_coverage_15": 0.026907961070537566, "rewards/accgated_coverage_20": 0.053805211931467055, "rewards/accgated_coverage_25": 0.09571786969900131, "rewards/accgated_coverage_5": 0.019920169189572335, "rewards/accuracy_reward": 0.6619791626930237, "rewards/brier_reward": 0.815404748916626, "rewards/confidence_uniqueness_reward": 0.932914924621582, "rewards/format_reward": 0.9869791626930237, "rewards/frontier_aurc_reward": -0.0016872843028977514, "rewards/frontier_ece_reward": 0.0004102005223103333, "rewards/frontier_entropy_batch_reward": -0.32873486876487734, "signal/accgated_coverage_0/centered_abs_mean": 0.0784646600484848, "signal/accgated_coverage_0/group_std_mean": 0.10561068952083588, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007846465986222028, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007846465986222028, "signal/accgated_coverage_1/centered_abs_mean": 0.0784646600484848, "signal/accgated_coverage_1/group_std_mean": 0.10561068952083588, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007846465986222028, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007846465986222028, "signal/accgated_coverage_10/centered_abs_mean": 0.05457337722182274, "signal/accgated_coverage_10/group_std_mean": 0.07424464225769042, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005457338038831949, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005457338038831949, "signal/accgated_coverage_15/centered_abs_mean": 0.028437989950180053, "signal/accgated_coverage_15/group_std_mean": 0.036998636275529864, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0028437990695238113, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0028437990695238113, "signal/accgated_coverage_20/centered_abs_mean": 0.034603772684931755, "signal/accgated_coverage_20/group_std_mean": 0.044347959011793135, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034603772219270468, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034603772219270468, "signal/accgated_coverage_25/centered_abs_mean": 0.05456459298729897, "signal/accgated_coverage_25/group_std_mean": 0.07072983086109161, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005456459615379572, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005456459615379572, "signal/accgated_coverage_5/centered_abs_mean": 0.07840372771024703, "signal/accgated_coverage_5/group_std_mean": 0.10553145706653595, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007840372994542122, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007840372994542122, "signal/accuracy_reward/centered_abs_mean": 0.15810546576976775, "signal/accuracy_reward/group_std_mean": 0.21627101302146912, "signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07905273288488388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07905273288488388, "signal/advantage_abs_mean": 0.09530858844518661, "signal/advantage_pre_scale_abs_mean": 0.09530858844518661, "signal/advantage_pre_scale_std": 0.15653879344463348, "signal/advantage_std": 0.15653879344463348, "signal/brier_reward/centered_abs_mean": 0.12907694429159164, "signal/brier_reward/group_std_mean": 0.1693983793258667, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012907694093883038, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012907694093883038, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034757498651742935, "signal/confidence_uniqueness_reward/group_std_mean": 0.05477444678544998, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003475749958306551, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003475749958306551, "signal/format_reward/centered_abs_mean": 0.0216796875, "signal/format_reward/group_std_mean": 0.039159010723233224, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01083984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01083984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001960353879258037, "signal/frontier_aurc_reward/group_std_mean": 0.0034777455497533084, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4504425164195708e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4504425164195708e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009798597171902656, "signal/frontier_ece_reward/group_std_mean": 0.012810107320547104, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009798598010092973, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009798598010092973, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32911902070045473, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40096608400344846, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03291190341114998, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03291190341114998, "step": 195 }, { "calibration/aurc": 0.11813731305579875, "calibration/batch_distribution_entropy": 0.9250420952698768, "calibration/buffer_distribution_entropy": 0.9640337582651292, "calibration/confidence_entropy": 0.47253733829761435, "calibration/coverage@0%": 0.046461767915647115, "calibration/coverage@1%": 0.046461767915647115, "calibration/coverage@10%": 0.5523816210957633, "calibration/coverage@15%": 0.6547474620660765, "calibration/coverage@20%": 0.9090805045019799, "calibration/coverage@25%": 0.9398860165794065, "calibration/coverage@30%": 0.9665166884816754, "calibration/coverage@5%": 0.35775492558328736, "calibration/ece": 0.1328041371310841, "calibration/mean_confidence": 0.6450623844795544, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007552083333333304, "completions/max_length": 3451.2, "completions/max_terminated_length": 3451.2, "completions/mean_length": 720.2349853515625, "completions/mean_terminated_length": 725.751220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.47999400007499904, "grad_norm": 0.00045016928925178945, "learning_rate": 2.409638554216868e-07, "loss": -0.0057, "num_tokens": 440827659.0, "reward": 1.0121694087982178, "reward_std": 0.12391498684883118, "rewards/accgated_coverage_0": 0.017262194491922855, "rewards/accgated_coverage_1": 0.017262194491922855, "rewards/accgated_coverage_10": 0.01849367544054985, "rewards/accgated_coverage_15": 0.029358771443367005, "rewards/accgated_coverage_20": 0.06178856343030929, "rewards/accgated_coverage_25": 0.10975199192762375, "rewards/accgated_coverage_5": 0.017268973495811225, "rewards/accuracy_reward": 0.6933159708976746, "rewards/brier_reward": 0.8302036046981811, "rewards/confidence_uniqueness_reward": 0.93699049949646, "rewards/format_reward": 0.9922743082046509, "rewards/frontier_aurc_reward": -0.0020338458009064196, "rewards/frontier_ece_reward": -0.0003850290362606756, "rewards/frontier_entropy_batch_reward": -0.3439982354640961, "signal/accgated_coverage_0/centered_abs_mean": 0.07879135310649872, "signal/accgated_coverage_0/group_std_mean": 0.10471928268671035, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007879135478287936, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007879135478287936, "signal/accgated_coverage_1/centered_abs_mean": 0.07879135310649872, "signal/accgated_coverage_1/group_std_mean": 0.10471928268671035, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007879135478287936, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007879135478287936, "signal/accgated_coverage_10/centered_abs_mean": 0.05094265937805176, "signal/accgated_coverage_10/group_std_mean": 0.06902378126978874, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005094265658408403, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005094265658408403, "signal/accgated_coverage_15/centered_abs_mean": 0.028977422043681145, "signal/accgated_coverage_15/group_std_mean": 0.03736466318368912, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.002897742437198758, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.002897742437198758, "signal/accgated_coverage_20/centered_abs_mean": 0.038806602358818054, "signal/accgated_coverage_20/group_std_mean": 0.04972253888845444, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0038806602358818056, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0038806602358818056, "signal/accgated_coverage_25/centered_abs_mean": 0.06239664033055305, "signal/accgated_coverage_25/group_std_mean": 0.08014876991510392, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00623966408893466, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00623966408893466, "signal/accgated_coverage_5/centered_abs_mean": 0.0787682592868805, "signal/accgated_coverage_5/group_std_mean": 0.10468966215848922, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007876825984567404, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007876825984567404, "signal/accuracy_reward/centered_abs_mean": 0.1601508229970932, "signal/accuracy_reward/group_std_mean": 0.2075590342283249, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0800754114985466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0800754114985466, "signal/advantage_abs_mean": 0.09237567484378814, "signal/advantage_pre_scale_abs_mean": 0.09237567484378814, "signal/advantage_pre_scale_std": 0.14858520925045013, "signal/advantage_std": 0.14858520925045013, "signal/brier_reward/centered_abs_mean": 0.11585159003734588, "signal/brier_reward/group_std_mean": 0.15270988047122955, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01158515941351652, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01158515941351652, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027825209125876427, "signal/confidence_uniqueness_reward/group_std_mean": 0.04609650820493698, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002782521024346352, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002782521024346352, "signal/format_reward/centered_abs_mean": 0.01366644985973835, "signal/format_reward/group_std_mean": 0.029148318618535996, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006833224929869175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006833224929869175, "signal/frontier_aurc_reward/centered_abs_mean": 0.002534387307241559, "signal/frontier_aurc_reward/group_std_mean": 0.0048497423063963655, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.167984214087482e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.167984214087482e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009504923969507218, "signal/frontier_ece_reward/group_std_mean": 0.012250457704067231, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009504924179054797, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009504924179054797, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32950940132141116, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39927846789360044, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032950940728187564, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032950940728187564, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.1088882915120361, "eval_calibration/batch_distribution_entropy": 0.884755325790068, "eval_calibration/buffer_distribution_entropy": 0.9641450900696295, "eval_calibration/confidence_entropy": 0.4517086267080035, "eval_calibration/coverage@0%": 0.3333333333333333, "eval_calibration/coverage@1%": 0.3333333333333333, "eval_calibration/coverage@10%": 0.59375, "eval_calibration/coverage@15%": 0.6979166666666666, "eval_calibration/coverage@20%": 0.8489583333333334, "eval_calibration/coverage@25%": 0.9114583333333334, "eval_calibration/coverage@30%": 0.9791666666666666, "eval_calibration/coverage@5%": 0.34375, "eval_calibration/ece": 0.18822067071656992, "eval_calibration/mean_confidence": 0.6140182765846043, "eval_completions/clipped_ratio": 0.010416666666666666, "eval_completions/max_length": 2482.0, "eval_completions/max_terminated_length": 2482.0, "eval_completions/mean_length": 720.477793375651, "eval_completions/mean_terminated_length": 728.0352681477865, "eval_completions/min_length": 57.333333333333336, "eval_completions/min_terminated_length": 251.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 440827659.0, "eval_reward": 0.9302136798699697, "eval_reward_std": 0.232889657219251, "eval_rewards/accgated_coverage_0": 0.021035971275220316, "eval_rewards/accgated_coverage_1": 0.021035971275220316, "eval_rewards/accgated_coverage_10": 0.020410844823345542, "eval_rewards/accgated_coverage_15": 0.029529539868235588, "eval_rewards/accgated_coverage_20": 0.05993118633826574, "eval_rewards/accgated_coverage_25": 0.10444261009494464, "eval_rewards/accgated_coverage_5": 0.021037622820585966, "eval_rewards/accuracy_reward": 0.6718750099341074, "eval_rewards/brier_reward": 0.8273646434148153, "eval_rewards/confidence_uniqueness_reward": 0.8833588063716888, "eval_rewards/format_reward": 0.988715281089147, "eval_rewards/frontier_aurc_reward": -0.0017877276889824618, "eval_rewards/frontier_ece_reward": -2.3216848300459485e-05, "eval_rewards/frontier_entropy_batch_reward": -0.988715281089147, "eval_runtime": 205.9088, "eval_samples_per_second": 4.857, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.12559553111592928, "eval_signal/accgated_coverage_0/group_std_mean": 0.17820352067550024, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.01255955391873916, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.01255955391873916, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.12559553111592928, "eval_signal/accgated_coverage_1/group_std_mean": 0.17820352067550024, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.01255955391873916, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.01255955391873916, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.07524273234109084, "eval_signal/accgated_coverage_10/group_std_mean": 0.11235688626766205, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007524273591116071, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007524273591116071, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.039298239474495254, "eval_signal/accgated_coverage_15/group_std_mean": 0.049893214677770935, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003929823908644418, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003929823908644418, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.06626473863919576, "eval_signal/accgated_coverage_20/group_std_mean": 0.08086467534303665, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0066264736621330185, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0066264736621330185, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.11843342582384746, "eval_signal/accgated_coverage_25/group_std_mean": 0.14078539858261743, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.011843343110134205, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.011843343110134205, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.12558058152596155, "eval_signal/accgated_coverage_5/group_std_mean": 0.17818409701188406, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.012558058214684328, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.012558058214684328, "eval_signal/accuracy_reward/centered_abs_mean": 0.4229600677887599, "eval_signal/accuracy_reward/group_std_mean": 0.4658859223127365, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21148003389437994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21148003389437994, "eval_signal/advantage_abs_mean": 0.19910976042350134, "eval_signal/advantage_pre_scale_abs_mean": 0.19910976042350134, "eval_signal/advantage_pre_scale_std": 0.23237022509177527, "eval_signal/advantage_std": 0.23237022509177527, "eval_signal/brier_reward/centered_abs_mean": 0.17643060783545175, "eval_signal/brier_reward/group_std_mean": 0.23687549928824106, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01764306053519249, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01764306053519249, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0549784650405248, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08964706336458524, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005497846674794952, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005497846674794952, "eval_signal/format_reward/centered_abs_mean": 0.021647135416666668, "eval_signal/format_reward/group_std_mean": 0.057857211058338485, "eval_signal/format_reward/group_zero_std_frac": 0.6944444676240286, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010823567708333334, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.010823567708333334, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030125895670304694, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006865158909931779, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.765737180098464e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.765737180098464e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013848148131122192, "eval_signal/frontier_ece_reward/group_std_mean": 0.01800649023304383, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001384814813112219, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001384814813112219, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.021647135416666668, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.057857211058338485, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6944444676240286, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0021647136115158596, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0021647136115158596, "eval_steps_per_second": 0.029, "step": 200 }, { "calibration/aurc": 0.16183124767173385, "calibration/batch_distribution_entropy": 0.903965862936228, "calibration/buffer_distribution_entropy": 0.9639420109475438, "calibration/confidence_entropy": 0.4274784320917583, "calibration/coverage@0%": 0.04289515021782449, "calibration/coverage@1%": 0.04289515021782449, "calibration/coverage@10%": 0.31463650649209207, "calibration/coverage@15%": 0.5353159409957227, "calibration/coverage@20%": 0.8229195440318524, "calibration/coverage@25%": 0.8926666046296227, "calibration/coverage@30%": 0.9345367077157132, "calibration/coverage@5%": 0.12416531949199486, "calibration/ece": 0.11350287813588537, "calibration/mean_confidence": 0.6439586636148722, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333325, "completions/max_length": 3574.4, "completions/max_terminated_length": 3574.4, "completions/mean_length": 734.712060546875, "completions/mean_terminated_length": 741.4294067382813, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.491993850076874, "grad_norm": 0.00032840637140907347, "learning_rate": 9.036144578313253e-08, "loss": -0.0075, "num_tokens": 452357494.0, "reward": 1.0280473828315735, "reward_std": 0.12178252339363098, "rewards/accgated_coverage_0": 0.010969918034970761, "rewards/accgated_coverage_1": 0.010969918034970761, "rewards/accgated_coverage_10": 0.016975909285247327, "rewards/accgated_coverage_15": 0.0357561755925417, "rewards/accgated_coverage_20": 0.07814027667045594, "rewards/accgated_coverage_25": 0.13613629937171937, "rewards/accgated_coverage_5": 0.010972013510763646, "rewards/accuracy_reward": 0.7239583373069763, "rewards/brier_reward": 0.8324974060058594, "rewards/confidence_uniqueness_reward": 0.9338359713554383, "rewards/format_reward": 0.9907986164093018, "rewards/frontier_aurc_reward": -0.0017422198783606292, "rewards/frontier_ece_reward": -0.0017038072284776718, "rewards/frontier_entropy_batch_reward": -0.35764376521110536, "signal/accgated_coverage_0/centered_abs_mean": 0.09229595065116883, "signal/accgated_coverage_0/group_std_mean": 0.12302704006433487, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009229595586657525, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009229595586657525, "signal/accgated_coverage_1/centered_abs_mean": 0.09229595065116883, "signal/accgated_coverage_1/group_std_mean": 0.12302704006433487, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009229595586657525, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009229595586657525, "signal/accgated_coverage_10/centered_abs_mean": 0.056336633116006854, "signal/accgated_coverage_10/group_std_mean": 0.07609608769416809, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005633663292974234, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005633663292974234, "signal/accgated_coverage_15/centered_abs_mean": 0.03357893191277981, "signal/accgated_coverage_15/group_std_mean": 0.042671628296375275, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00335789336822927, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00335789336822927, "signal/accgated_coverage_20/centered_abs_mean": 0.04358198344707489, "signal/accgated_coverage_20/group_std_mean": 0.0558698907494545, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004358198214322329, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004358198214322329, "signal/accgated_coverage_25/centered_abs_mean": 0.0666369266808033, "signal/accgated_coverage_25/group_std_mean": 0.0865581214427948, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006663692649453878, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006663692649453878, "signal/accgated_coverage_5/centered_abs_mean": 0.09228282570838928, "signal/accgated_coverage_5/group_std_mean": 0.12301015555858612, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009228283166885376, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009228283166885376, "signal/accuracy_reward/centered_abs_mean": 0.1586371511220932, "signal/accuracy_reward/group_std_mean": 0.21199294328689575, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0793185755610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0793185755610466, "signal/advantage_abs_mean": 0.08658735156059265, "signal/advantage_pre_scale_abs_mean": 0.08658735156059265, "signal/advantage_pre_scale_std": 0.145833295583725, "signal/advantage_std": 0.145833295583725, "signal/brier_reward/centered_abs_mean": 0.11947631686925889, "signal/brier_reward/group_std_mean": 0.15880028307437896, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0119476318359375, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0119476318359375, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03140333443880081, "signal/confidence_uniqueness_reward/group_std_mean": 0.05210669934749603, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031403335742652416, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031403335742652416, "signal/format_reward/centered_abs_mean": 0.016384548787027598, "signal/format_reward/group_std_mean": 0.03408227376639843, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008192274393513799, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008192274393513799, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019668075372464957, "signal/frontier_aurc_reward/group_std_mean": 0.0035003958269953727, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.458509425196098e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.458509425196098e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009487048350274562, "signal/frontier_ece_reward/group_std_mean": 0.012332708947360516, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009487048489972949, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009487048489972949, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33413779735565186, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4035483181476593, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033413780853152275, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033413780853152275, "step": 205 }, { "calibration/aurc": 0.08513920603973812, "calibration/batch_distribution_entropy": 0.9141002430834693, "calibration/buffer_distribution_entropy": 0.9630652371634193, "calibration/confidence_entropy": 0.4457588800263559, "calibration/coverage@0%": 0.0602074695165456, "calibration/coverage@1%": 0.22643897517363867, "calibration/coverage@10%": 0.6904621196478132, "calibration/coverage@15%": 0.8459882447770486, "calibration/coverage@20%": 0.9142371635896023, "calibration/coverage@25%": 0.9719849361623955, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.4957522217913381, "calibration/ece": 0.11242408581979568, "calibration/mean_confidence": 0.6651221303123294, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006510416666666667, "completions/max_length": 3355.6666666666665, "completions/max_terminated_length": 3355.6666666666665, "completions/mean_length": 733.4699096679688, "completions/mean_terminated_length": 738.4192504882812, "completions/min_length": 0.0, "completions/min_terminated_length": 219.66666666666666, "epoch": 0.49919376007799904, "num_tokens": 459293286.0, "reward": 1.0133548974990845, "reward_std": 0.12018975863854091, "rewards/accgated_coverage_0": 0.014022265560925007, "rewards/accgated_coverage_1": 0.014022265560925007, "rewards/accgated_coverage_10": 0.01640247491498788, "rewards/accgated_coverage_15": 0.03245990971724192, "rewards/accgated_coverage_20": 0.06980260213216145, "rewards/accgated_coverage_25": 0.12002388884623845, "rewards/accgated_coverage_5": 0.014022964673737684, "rewards/accuracy_reward": 0.6903935273488363, "rewards/brier_reward": 0.8296072085698446, "rewards/confidence_uniqueness_reward": 0.9375643134117126, "rewards/format_reward": 0.9934896032015482, "rewards/frontier_aurc_reward": -0.001204290989941607, "rewards/frontier_ece_reward": -0.00047483538219239563, "rewards/frontier_entropy_batch_reward": -0.3331694006919861, "signal/accgated_coverage_0/centered_abs_mean": 0.08532695472240448, "signal/accgated_coverage_0/group_std_mean": 0.11210805177688599, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008532696248342594, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008532696248342594, "signal/accgated_coverage_1/centered_abs_mean": 0.08532695472240448, "signal/accgated_coverage_1/group_std_mean": 0.11210805177688599, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008532696248342594, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008532696248342594, "signal/accgated_coverage_10/centered_abs_mean": 0.05023227507869402, "signal/accgated_coverage_10/group_std_mean": 0.0667143886288007, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005023227694133918, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005023227694133918, "signal/accgated_coverage_15/centered_abs_mean": 0.03077574260532856, "signal/accgated_coverage_15/group_std_mean": 0.039111041774352394, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0030775743070989847, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0030775743070989847, "signal/accgated_coverage_20/centered_abs_mean": 0.03875031570593516, "signal/accgated_coverage_20/group_std_mean": 0.04919542744755745, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0038750318344682455, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0038750318344682455, "signal/accgated_coverage_25/centered_abs_mean": 0.05905377368132273, "signal/accgated_coverage_25/group_std_mean": 0.07615283379952113, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005905377523352702, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005905377523352702, "signal/accgated_coverage_5/centered_abs_mean": 0.08531387398640315, "signal/accgated_coverage_5/group_std_mean": 0.11209150652090709, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008531387584904829, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008531387584904829, "signal/accuracy_reward/centered_abs_mean": 0.162398728231589, "signal/accuracy_reward/group_std_mean": 0.21247334778308868, "signal/accuracy_reward/group_zero_std_frac": 0.4027777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0811993641157945, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0811993641157945, "signal/advantage_abs_mean": 0.08777189254760742, "signal/advantage_pre_scale_abs_mean": 0.08777189254760742, "signal/advantage_pre_scale_std": 0.14062808950742087, "signal/advantage_std": 0.14062808950742087, "signal/brier_reward/centered_abs_mean": 0.11930795510609944, "signal/brier_reward/group_std_mean": 0.15778929988543192, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011930795386433601, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011930795386433601, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026650328810016315, "signal/confidence_uniqueness_reward/group_std_mean": 0.04459113130966822, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002665032943089803, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002665032943089803, "signal/format_reward/centered_abs_mean": 0.011962890314559141, "signal/format_reward/group_std_mean": 0.02671019857128461, "signal/format_reward/group_zero_std_frac": 0.875000019868215, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005981445157279571, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005981445157279571, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013815810283025105, "signal/frontier_aurc_reward/group_std_mean": 0.0024687413048620024, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.72697618836537e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.72697618836537e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.010010889731347561, "signal/frontier_ece_reward/group_std_mean": 0.01289159276833137, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001001088957612713, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001001088957612713, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3178635636965434, "signal/frontier_entropy_batch_reward/group_std_mean": 0.389106810092926, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031786357363065086, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031786357363065086, "step": 208, "total_flos": 0.0, "train_loss": -0.009436613047280563, "train_runtime": 40838.7093, "train_samples_per_second": 0.367, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 459293286, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }