{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.019230769230769, "eval_steps": 50, "global_step": 840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5143116826276487, "calibration/batch_distribution_entropy": 0.24942287672258642, "calibration/batch_entropy_100bins": 0.34826538716122146, "calibration/batch_entropy_10bins": 0.24942287672258642, "calibration/batch_entropy_50bins": 0.3972500110134775, "calibration/batch_uniqueness": 0.5076220224596407, "calibration/confidence_entropy": 0.22319858203556509, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.24942287672258642, "calibration/distribution_entropy_100": 0.34826538716122146, "calibration/ece": 0.4648402350505384, "calibration/mean_confidence": 0.9214651731076637, "calibration/unique_confidence_per_question": 0.03229166666666667, "calibration/unique_confidences": 12.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018576388888888885, "completions/max_length": 4027.8, "completions/max_terminated_length": 4027.8, "completions/mean_length": 496.7038146972656, "completions/mean_terminated_length": 506.1095825195313, "completions/min_length": 0.0, "completions/min_terminated_length": 2.4, "epoch": 0.01201923076923077, "grad_norm": 0.004719023127108812, "learning_rate": 1.201923076923077e-07, "loss": 0.0071, "num_tokens": 8801292.0, "reward": 0.5796843767166138, "reward_std": 0.5260672450065613, "rewards/accuracy_reward": 0.24774305820465087, "rewards/brier_reward": 0.3010988771915436, "rewards/confidence_one_or_zero": 0.3295138835906982, "rewards/format_reward": 0.6105034708976745, "rewards/mean_confidence_reward": 0.8376270651817321, "sampling/batch_mean_priority_error": 0.41623627994107837, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 1.0, "sampling/error_ema_max": 0.08493557721376419, "sampling/error_ema_mean": 0.00029969110910315065, "sampling/priority_kl": 0.007231711782515049, "sampling/priority_scale": 1048575.9998779297, "sampling/prob_entropy": 10.301719474792481, "sampling/prob_max": 3.357546593178995e-05, "sampling/prob_min": 0.0, "sampling/prompt_draws_max": 1.0, "sampling/prompt_draws_mean": 0.007200000155717134, "sampling/prompt_draws_total": 216.0, "sampling/seen_fraction": 0.007200000155717134, "sampling/unseen_fraction": 0.9927999998442829, "signal/accuracy_reward/centered_abs_mean": 0.2978624105453491, "signal/accuracy_reward/group_std_mean": 0.3558677971363068, "signal/accuracy_reward/group_zero_std_frac": 0.11944444626569747, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14893120527267456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14893120527267456, "signal/advantage_abs_mean": 0.4478647530078888, "signal/advantage_pre_scale_abs_mean": 0.4478647530078888, "signal/advantage_pre_scale_std": 0.535431969165802, "signal/advantage_std": 0.535431969165802, "signal/brier_reward/centered_abs_mean": 0.3062243342399597, "signal/brier_reward/group_std_mean": 0.3604122519493103, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15311216711997985, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15311216711997985, "signal/confidence_one_or_zero/centered_abs_mean": 0.4056097984313965, "signal/confidence_one_or_zero/group_std_mean": 0.4532551825046539, "signal/confidence_one_or_zero/group_zero_std_frac": 0.002777777798473835, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.056097986904206e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.056097986904206e-06, "signal/format_reward/centered_abs_mean": 0.4284885048866272, "signal/format_reward/group_std_mean": 0.4680496335029602, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2142442524433136, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2142442524433136, "signal/mean_confidence_reward/centered_abs_mean": 0.1593179762363434, "signal/mean_confidence_reward/group_std_mean": 0.23351403772830964, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.5931796951917932e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.5931796951917932e-06, "step": 5 }, { "calibration/aurc": 0.6343878019492749, "calibration/batch_distribution_entropy": 0.2533414958520655, "calibration/batch_entropy_100bins": 0.3445832958692546, "calibration/batch_entropy_10bins": 0.2533414958520655, "calibration/batch_entropy_50bins": 0.39785691961510256, "calibration/batch_uniqueness": 0.5116630254293695, "calibration/confidence_entropy": 0.21199203076901546, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.2533414958520655, "calibration/distribution_entropy_100": 0.3445832958692546, "calibration/ece": 0.590023294162618, "calibration/mean_confidence": 0.9236542957365893, "calibration/unique_confidence_per_question": 0.03020833333333333, "calibration/unique_confidences": 11.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019704861111111117, "completions/max_length": 4034.8, "completions/max_terminated_length": 4034.8, "completions/mean_length": 521.3720520019531, "completions/mean_terminated_length": 531.8583312988281, "completions/min_length": 0.0, "completions/min_terminated_length": 10.2, "epoch": 0.02403846153846154, "grad_norm": 0.00470328563824296, "learning_rate": 2.403846153846154e-07, "loss": 0.0038, "num_tokens": 17889514.0, "reward": 0.5620967268943786, "reward_std": 0.5192024111747742, "rewards/accuracy_reward": 0.23385416269302367, "rewards/brier_reward": 0.28927443027496336, "rewards/confidence_one_or_zero": 0.33159722089767457, "rewards/format_reward": 0.6010416746139526, "rewards/mean_confidence_reward": 0.8290581583976746, "sampling/batch_mean_priority_error": 0.4233142476730391, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 1.0, "sampling/error_ema_max": 0.09038958102464675, "sampling/error_ema_mean": 0.0008069580886512995, "sampling/priority_kl": 0.019392204470932484, "sampling/priority_scale": 1048575.9998779297, "sampling/prob_entropy": 10.289560317993164, "sampling/prob_max": 3.398626940906979e-05, "sampling/prob_min": 0.0, "sampling/prompt_draws_max": 1.0, "sampling/prompt_draws_mean": 0.019200000166893005, "sampling/prompt_draws_total": 576.0, "sampling/seen_fraction": 0.019200000166893005, "sampling/unseen_fraction": 0.980799999833107, "signal/accuracy_reward/centered_abs_mean": 0.2861653625965118, "signal/accuracy_reward/group_std_mean": 0.3420232474803925, "signal/accuracy_reward/group_zero_std_frac": 0.1500000014901161, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1430826812982559, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1430826812982559, "signal/advantage_abs_mean": 0.4444202482700348, "signal/advantage_pre_scale_abs_mean": 0.4444202482700348, "signal/advantage_pre_scale_std": 0.5322044849395752, "signal/advantage_std": 0.5322044849395752, "signal/brier_reward/centered_abs_mean": 0.299970406293869, "signal/brier_reward/group_std_mean": 0.3515132188796997, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1499852031469345, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.1499852031469345, "signal/confidence_one_or_zero/centered_abs_mean": 0.3983506977558136, "signal/confidence_one_or_zero/group_std_mean": 0.4477837383747101, "signal/confidence_one_or_zero/group_zero_std_frac": 0.00555555559694767, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.98350675823167e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.98350675823167e-06, "signal/format_reward/centered_abs_mean": 0.43616536259651184, "signal/format_reward/group_std_mean": 0.472662889957428, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21808268129825592, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21808268129825592, "signal/mean_confidence_reward/centered_abs_mean": 0.16609691679477692, "signal/mean_confidence_reward/group_std_mean": 0.24436136484146118, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.660969246586319e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.660969246586319e-06, "step": 10 }, { "calibration/aurc": 0.5529991990141164, "calibration/batch_distribution_entropy": 0.25530890439239107, "calibration/batch_entropy_100bins": 0.3490627169603118, "calibration/batch_entropy_10bins": 0.25530890439239107, "calibration/batch_entropy_50bins": 0.40684862802386473, "calibration/batch_uniqueness": 0.507648785050892, "calibration/confidence_entropy": 0.21876854337479537, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.25530890439239107, "calibration/distribution_entropy_100": 0.3490627169603118, "calibration/ece": 0.5166797503075292, "calibration/mean_confidence": 0.9174774627907002, "calibration/unique_confidence_per_question": 0.033854166666666664, "calibration/unique_confidences": 13.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02074652777777779, "completions/max_length": 4065.4, "completions/max_terminated_length": 4065.4, "completions/mean_length": 500.5936706542969, "completions/mean_terminated_length": 511.26482543945315, "completions/min_length": 0.0, "completions/min_terminated_length": 5.8, "epoch": 0.036057692307692304, "grad_norm": 0.00489252433180809, "learning_rate": 3.6057692307692306e-07, "loss": 0.0001, "num_tokens": 26764513.0, "reward": 0.6016612768173217, "reward_std": 0.5342199921607971, "rewards/accuracy_reward": 0.25651041865348817, "rewards/brier_reward": 0.3121532380580902, "rewards/confidence_one_or_zero": 0.33567708134651186, "rewards/format_reward": 0.6346354007720947, "rewards/mean_confidence_reward": 0.8399278521537781, "sampling/batch_mean_priority_error": 0.4096629944680966, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 1.0, "sampling/error_ema_max": 0.09385144263505936, "sampling/error_ema_mean": 0.0013033524388447403, "sampling/priority_kl": 0.029195374995470046, "sampling/priority_scale": 419434.37669820746, "sampling/prob_entropy": 10.279756546020508, "sampling/prob_max": 3.438788626226596e-05, "sampling/prob_min": 5.128296152179246e-07, "sampling/prompt_draws_max": 1.0, "sampling/prompt_draws_mean": 0.031199999153614044, "sampling/prompt_draws_total": 936.0, "sampling/seen_fraction": 0.031199999153614044, "sampling/unseen_fraction": 0.968800000846386, "signal/accuracy_reward/centered_abs_mean": 0.31207139492034913, "signal/accuracy_reward/group_std_mean": 0.37355145812034607, "signal/accuracy_reward/group_zero_std_frac": 0.08333333432674409, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15603569746017457, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15603569746017457, "signal/advantage_abs_mean": 0.45120006799697876, "signal/advantage_pre_scale_abs_mean": 0.45120006799697876, "signal/advantage_pre_scale_std": 0.541375458240509, "signal/advantage_std": 0.541375458240509, "signal/brier_reward/centered_abs_mean": 0.31599055528640746, "signal/brier_reward/group_std_mean": 0.3715106964111328, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15799527764320373, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15799527764320373, "signal/confidence_one_or_zero/centered_abs_mean": 0.401806640625, "signal/confidence_one_or_zero/group_std_mean": 0.4505423367023468, "signal/confidence_one_or_zero/group_zero_std_frac": 0.002777777798473835, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.018066374555929e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.018066374555929e-06, "signal/format_reward/centered_abs_mean": 0.42190213203430177, "signal/format_reward/group_std_mean": 0.46364007592201234, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21095106601715088, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21095106601715088, "signal/mean_confidence_reward/centered_abs_mean": 0.15849553644657136, "signal/mean_confidence_reward/group_std_mean": 0.23422836661338806, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.5849552255531307e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.5849552255531307e-06, "step": 15 }, { "calibration/aurc": 0.5617025961821958, "calibration/batch_distribution_entropy": 0.30985545137677556, "calibration/batch_entropy_100bins": 0.36597686438896854, "calibration/batch_entropy_10bins": 0.30985545137677556, "calibration/batch_entropy_50bins": 0.4230100127785527, "calibration/batch_uniqueness": 0.5376178366974512, "calibration/confidence_entropy": 0.23943032642759107, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.30985545137677556, "calibration/distribution_entropy_100": 0.36597686438896854, "calibration/ece": 0.5239923082317872, "calibration/mean_confidence": 0.9072021034937515, "calibration/unique_confidence_per_question": 0.03177083333333333, "calibration/unique_confidences": 12.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018315972222222233, "completions/max_length": 4024.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 480.8516540527344, "completions/mean_terminated_length": 489.8557922363281, "completions/min_length": 0.0, "completions/min_terminated_length": 19.2, "epoch": 0.04807692307692308, "grad_norm": 0.00305964145809412, "learning_rate": 4.807692307692308e-07, "loss": 0.0029, "num_tokens": 35392660.0, "reward": 0.7069159865379333, "reward_std": 0.5072059988975525, "rewards/accuracy_reward": 0.30347222089767456, "rewards/brier_reward": 0.3697111070156097, "rewards/confidence_one_or_zero": 0.323784726858139, "rewards/format_reward": 0.740625011920929, "rewards/mean_confidence_reward": 0.8573902249336243, "sampling/batch_mean_priority_error": 0.40485228530318923, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9972222222222221, "sampling/error_ema_max": 0.09483172744512558, "sampling/error_ema_mean": 0.0017890206538140774, "sampling/priority_kl": 0.02999992184340954, "sampling/priority_scale": 4.172456359863281, "sampling/prob_entropy": 10.278950881958007, "sampling/prob_max": 3.469034854788333e-05, "sampling/prob_min": 2.624379021654022e-06, "sampling/prompt_draws_max": 1.2, "sampling/prompt_draws_mean": 0.04320000112056732, "sampling/prompt_draws_total": 1296.0, "sampling/seen_fraction": 0.043193334341049196, "sampling/unseen_fraction": 0.9568066656589508, "signal/accuracy_reward/centered_abs_mean": 0.32134330868721006, "signal/accuracy_reward/group_std_mean": 0.376801460981369, "signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16067165434360503, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16067165434360503, "signal/advantage_abs_mean": 0.4259904146194458, "signal/advantage_pre_scale_abs_mean": 0.4259904146194458, "signal/advantage_pre_scale_std": 0.5144033670425415, "signal/advantage_std": 0.5144033670425415, "signal/brier_reward/centered_abs_mean": 0.31932910084724425, "signal/brier_reward/group_std_mean": 0.3702052295207977, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15966455042362213, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15966455042362213, "signal/confidence_one_or_zero/centered_abs_mean": 0.39694010019302367, "signal/confidence_one_or_zero/group_std_mean": 0.4463639736175537, "signal/confidence_one_or_zero/group_zero_std_frac": 0.00555555559694767, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.969400859205053e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.969400859205053e-06, "signal/format_reward/centered_abs_mean": 0.34589843153953553, "signal/format_reward/group_std_mean": 0.4145213305950165, "signal/format_reward/group_zero_std_frac": 0.00555555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17294921576976777, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17294921576976777, "signal/mean_confidence_reward/centered_abs_mean": 0.1368652194738388, "signal/mean_confidence_reward/group_std_mean": 0.20745837390422822, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.3686520787814516e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.3686520787814516e-06, "step": 20 }, { "calibration/aurc": 0.6331574084168297, "calibration/batch_distribution_entropy": 0.3056451127645691, "calibration/batch_entropy_100bins": 0.3621909437209661, "calibration/batch_entropy_10bins": 0.3056451127645691, "calibration/batch_entropy_50bins": 0.4203603709425834, "calibration/batch_uniqueness": 0.5275142501595815, "calibration/confidence_entropy": 0.24152741604416814, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.3056451127645691, "calibration/distribution_entropy_100": 0.3621909437209661, "calibration/ece": 0.5781299287679891, "calibration/mean_confidence": 0.9073145999485227, "calibration/unique_confidence_per_question": 0.0359375, "calibration/unique_confidences": 13.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014756944444444465, "completions/max_length": 3893.4, "completions/max_terminated_length": 3893.4, "completions/mean_length": 448.126220703125, "completions/mean_terminated_length": 454.97858276367185, "completions/min_length": 0.0, "completions/min_terminated_length": 35.2, "epoch": 0.06009615384615385, "grad_norm": 0.0026125996373593807, "learning_rate": 6.009615384615385e-07, "loss": -0.0093, "num_tokens": 43616866.0, "reward": 0.8259699463844299, "reward_std": 0.4544887006282806, "rewards/accuracy_reward": 0.34383679628372193, "rewards/brier_reward": 0.4322117447853088, "rewards/confidence_one_or_zero": 0.2889756917953491, "rewards/format_reward": 0.8758680701255799, "rewards/mean_confidence_reward": 0.8741405367851257, "sampling/batch_mean_priority_error": 0.3930652402572373, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9916666666666668, "sampling/error_ema_max": 0.11524507701396942, "sampling/error_ema_mean": 0.002272505545988679, "sampling/priority_kl": 0.030000098794698716, "sampling/priority_scale": 3.1412979844026268, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 3.4941424382850526e-05, "sampling/prob_min": 2.6300597710360306e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.0551999993622303, "sampling/prompt_draws_total": 1656.0, "sampling/seen_fraction": 0.05510666668415069, "sampling/unseen_fraction": 0.9448933333158493, "signal/accuracy_reward/centered_abs_mean": 0.3353352904319763, "signal/accuracy_reward/group_std_mean": 0.39114522337913515, "signal/accuracy_reward/group_zero_std_frac": 0.06944444626569748, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16766764521598815, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16766764521598815, "signal/advantage_abs_mean": 0.37308090925216675, "signal/advantage_pre_scale_abs_mean": 0.37308090925216675, "signal/advantage_pre_scale_std": 0.46204503178596495, "signal/advantage_std": 0.46204503178596495, "signal/brier_reward/centered_abs_mean": 0.31565704345703127, "signal/brier_reward/group_std_mean": 0.36644824147224425, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15782852172851564, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15782852172851564, "signal/confidence_one_or_zero/centered_abs_mean": 0.3759385883808136, "signal/confidence_one_or_zero/group_std_mean": 0.4346859216690063, "signal/confidence_one_or_zero/group_zero_std_frac": 0.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.759385663215653e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.759385663215653e-06, "signal/format_reward/centered_abs_mean": 0.1980034738779068, "signal/format_reward/group_std_mean": 0.29695329666137693, "signal/format_reward/group_zero_std_frac": 0.06388889048248529, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0990017369389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0990017369389534, "signal/mean_confidence_reward/centered_abs_mean": 0.11213915348052979, "signal/mean_confidence_reward/group_std_mean": 0.1708782970905304, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.121391574088193e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.121391574088193e-06, "step": 25 }, { "calibration/aurc": 0.5908165285640636, "calibration/batch_distribution_entropy": 0.2872097804447499, "calibration/batch_entropy_100bins": 0.3629192291377286, "calibration/batch_entropy_10bins": 0.2872097804447499, "calibration/batch_entropy_50bins": 0.42088789860453224, "calibration/batch_uniqueness": 0.5167590103063137, "calibration/confidence_entropy": 0.23885239474768563, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.2872097804447499, "calibration/distribution_entropy_100": 0.3629192291377286, "calibration/ece": 0.5238896392678473, "calibration/mean_confidence": 0.9082567609618604, "calibration/unique_confidence_per_question": 0.040625, "calibration/unique_confidences": 15.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01119791666666663, "completions/max_length": 3916.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 455.15147705078124, "completions/mean_terminated_length": 460.3227966308594, "completions/min_length": 0.0, "completions/min_terminated_length": 61.6, "epoch": 0.07211538461538461, "grad_norm": 0.0015309693990275264, "learning_rate": 7.211538461538461e-07, "loss": -0.0117, "num_tokens": 51960627.0, "reward": 0.9048501491546631, "reward_std": 0.40102980136871336, "rewards/accuracy_reward": 0.38498263955116274, "rewards/brier_reward": 0.47920804619789126, "rewards/confidence_one_or_zero": 0.2866319477558136, "rewards/format_reward": 0.9454861044883728, "rewards/mean_confidence_reward": 0.8894478797912597, "sampling/batch_mean_priority_error": 0.38290548384535295, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.986111111111111, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0027343787252902983, "sampling/priority_kl": 0.029999807104468347, "sampling/priority_scale": 2.616620468813926, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 3.516697615850717e-05, "sampling/prob_min": 3.984351451435941e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.0671999990940094, "sampling/prompt_draws_total": 2016.0, "sampling/seen_fraction": 0.06696666553616523, "sampling/unseen_fraction": 0.9330333344638347, "signal/accuracy_reward/centered_abs_mean": 0.3223904073238373, "signal/accuracy_reward/group_std_mean": 0.3792769551277161, "signal/accuracy_reward/group_zero_std_frac": 0.08888889029622078, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16119520366191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16119520366191864, "signal/advantage_abs_mean": 0.33022568821907045, "signal/advantage_pre_scale_abs_mean": 0.33022568821907045, "signal/advantage_pre_scale_std": 0.41163812279701234, "signal/advantage_std": 0.41163812279701234, "signal/brier_reward/centered_abs_mean": 0.3007372856140137, "signal/brier_reward/group_std_mean": 0.3528658151626587, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15036864280700685, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15036864280700685, "signal/confidence_one_or_zero/centered_abs_mean": 0.36525607109069824, "signal/confidence_one_or_zero/group_std_mean": 0.42673117518424986, "signal/confidence_one_or_zero/group_zero_std_frac": 0.002777777798473835, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.652560599221033e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.652560599221033e-06, "signal/format_reward/centered_abs_mean": 0.0925564244389534, "signal/format_reward/group_std_mean": 0.1731490135192871, "signal/format_reward/group_zero_std_frac": 0.3027777820825577, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0462782122194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0462782122194767, "signal/mean_confidence_reward/centered_abs_mean": 0.09154417216777802, "signal/mean_confidence_reward/group_std_mean": 0.14112971723079681, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.154416829915135e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.154416829915135e-07, "step": 30 }, { "calibration/aurc": 0.49865686253089087, "calibration/batch_distribution_entropy": 0.3215165241574327, "calibration/batch_entropy_100bins": 0.36894158251549947, "calibration/batch_entropy_10bins": 0.3215165241574327, "calibration/batch_entropy_50bins": 0.42884229658872697, "calibration/batch_uniqueness": 0.5198789348898746, "calibration/confidence_entropy": 0.2638777012526705, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.08723404255319149, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.3215165241574327, "calibration/distribution_entropy_100": 0.36894158251549947, "calibration/ece": 0.4416422972833175, "calibration/mean_confidence": 0.8992454127844949, "calibration/unique_confidence_per_question": 0.0375, "calibration/unique_confidences": 14.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666652, "completions/max_length": 3919.6, "completions/max_terminated_length": 3919.6, "completions/mean_length": 482.71397705078124, "completions/mean_terminated_length": 487.82008056640626, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.08413461538461539, "grad_norm": 0.0012755130883306265, "learning_rate": 8.41346153846154e-07, "loss": -0.0108, "num_tokens": 60604820.0, "reward": 0.9773583769798279, "reward_std": 0.3674590468406677, "rewards/accuracy_reward": 0.4388888955116272, "rewards/brier_reward": 0.5380276739597321, "rewards/confidence_one_or_zero": 0.23333333134651185, "rewards/format_reward": 0.9777777671813965, "rewards/mean_confidence_reward": 0.8878483891487121, "sampling/batch_mean_priority_error": 0.3344713397737706, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.975, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.003155976487323642, "sampling/priority_kl": 0.03000006154179573, "sampling/priority_scale": 2.2848868130706252, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 3.537431184668094e-05, "sampling/prob_min": 5.246551245363662e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.07920000106096267, "sampling/prompt_draws_total": 2376.0, "sampling/seen_fraction": 0.07872666716575623, "sampling/unseen_fraction": 0.9212733328342437, "signal/accuracy_reward/centered_abs_mean": 0.3136827230453491, "signal/accuracy_reward/group_std_mean": 0.3738104820251465, "signal/accuracy_reward/group_zero_std_frac": 0.08611111268401146, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15684136152267455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15684136152267455, "signal/advantage_abs_mean": 0.3014487147331238, "signal/advantage_pre_scale_abs_mean": 0.3014487147331238, "signal/advantage_pre_scale_std": 0.3801884174346924, "signal/advantage_std": 0.3801884174346924, "signal/brier_reward/centered_abs_mean": 0.2799021899700165, "signal/brier_reward/group_std_mean": 0.3350242257118225, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.13995109498500824, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.13995109498500824, "signal/confidence_one_or_zero/centered_abs_mean": 0.31881510019302367, "signal/confidence_one_or_zero/group_std_mean": 0.39367093443870543, "signal/confidence_one_or_zero/group_zero_std_frac": 0.01111111119389534, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.1881509130471386e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.1881509130471386e-06, "signal/format_reward/centered_abs_mean": 0.04067925326526165, "signal/format_reward/group_std_mean": 0.08876850455999374, "signal/format_reward/group_zero_std_frac": 0.5944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.020339626632630826, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.020339626632630826, "signal/mean_confidence_reward/centered_abs_mean": 0.08388432711362839, "signal/mean_confidence_reward/group_std_mean": 0.12709739059209824, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.388432092942822e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.388432092942822e-07, "step": 35 }, { "calibration/aurc": 0.4823138666235419, "calibration/batch_distribution_entropy": 0.3863156724868472, "calibration/batch_entropy_100bins": 0.38263042759035665, "calibration/batch_entropy_10bins": 0.3863156724868472, "calibration/batch_entropy_50bins": 0.44357007596939224, "calibration/batch_uniqueness": 0.524572624992154, "calibration/confidence_entropy": 0.3053943449202666, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.04656084656084656, "calibration/coverage@30%": 0.047089947089947085, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.3863156724868472, "calibration/distribution_entropy_100": 0.38263042759035665, "calibration/ece": 0.4134137900803255, "calibration/mean_confidence": 0.8813164186107763, "calibration/unique_confidence_per_question": 0.04010416666666667, "calibration/unique_confidences": 15.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010677083333333327, "completions/max_length": 3680.4, "completions/max_terminated_length": 3680.4, "completions/mean_length": 512.7624267578125, "completions/mean_terminated_length": 518.3632568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.8, "epoch": 0.09615384615384616, "grad_norm": 0.0012608061078935862, "learning_rate": 9.615384615384617e-07, "loss": -0.0087, "num_tokens": 69623683.0, "reward": 1.0160284519195557, "reward_std": 0.34829759001731875, "rewards/accuracy_reward": 0.4693576395511627, "rewards/brier_reward": 0.579345452785492, "rewards/confidence_one_or_zero": 0.15078125, "rewards/format_reward": 0.9833333253860473, "rewards/mean_confidence_reward": 0.8721679806709289, "sampling/batch_mean_priority_error": 0.28377850106167013, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9694444444444444, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0035211187321692707, "sampling/priority_kl": 0.030000394210219383, "sampling/priority_scale": 2.052108299965039, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 3.5567307349992916e-05, "sampling/prob_min": 6.397180368367117e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.09120000004768372, "sampling/prompt_draws_total": 2736.0, "sampling/seen_fraction": 0.09033999890089035, "sampling/unseen_fraction": 0.9096600010991096, "signal/accuracy_reward/centered_abs_mean": 0.302978515625, "signal/accuracy_reward/group_std_mean": 0.3657167613506317, "signal/accuracy_reward/group_zero_std_frac": 0.08888889029622078, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1514892578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1514892578125, "signal/advantage_abs_mean": 0.2835480809211731, "signal/advantage_pre_scale_abs_mean": 0.2835480809211731, "signal/advantage_pre_scale_std": 0.3627303600311279, "signal/advantage_std": 0.3627303600311279, "signal/brier_reward/centered_abs_mean": 0.25960286259651183, "signal/brier_reward/group_std_mean": 0.31365215182304385, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12980143129825591, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.12980143129825591, "signal/confidence_one_or_zero/centered_abs_mean": 0.2295952707529068, "signal/confidence_one_or_zero/group_std_mean": 0.3169353187084198, "signal/confidence_one_or_zero/group_zero_std_frac": 0.08611111491918563, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.2959525495025445e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.2959525495025445e-06, "signal/format_reward/centered_abs_mean": 0.03025173619389534, "signal/format_reward/group_std_mean": 0.06552095487713813, "signal/format_reward/group_zero_std_frac": 0.700000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01512586809694767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01512586809694767, "signal/mean_confidence_reward/centered_abs_mean": 0.08143319040536881, "signal/mean_confidence_reward/group_std_mean": 0.12138528972864152, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.143319064402022e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.143319064402022e-07, "step": 40 }, { "calibration/aurc": 0.3800912542992382, "calibration/batch_distribution_entropy": 0.35937801231294425, "calibration/batch_entropy_100bins": 0.3474659546744869, "calibration/batch_entropy_10bins": 0.35937801231294425, "calibration/batch_entropy_50bins": 0.4084236105276625, "calibration/batch_uniqueness": 0.4383812566815227, "calibration/confidence_entropy": 0.3172211438602935, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.03622047244094488, "calibration/coverage@20%": 0.05774278215223098, "calibration/coverage@25%": 0.19790026246719158, "calibration/coverage@30%": 0.2, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.35937801231294425, "calibration/distribution_entropy_100": 0.3474659546744869, "calibration/ece": 0.2940468530345476, "calibration/mean_confidence": 0.882873292367725, "calibration/unique_confidence_per_question": 0.028645833333333332, "calibration/unique_confidences": 11.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888884, "completions/max_length": 4048.2, "completions/max_terminated_length": 4048.2, "completions/mean_length": 559.3792724609375, "completions/mean_terminated_length": 564.5820068359375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.10817307692307693, "grad_norm": 0.003110303310677409, "learning_rate": 1.0817307692307693e-06, "loss": -0.0034, "num_tokens": 79186324.0, "reward": 1.0602930545806886, "reward_std": 0.306660133600235, "rewards/accuracy_reward": 0.5138888955116272, "rewards/brier_reward": 0.6227369189262391, "rewards/confidence_one_or_zero": 0.10625, "rewards/format_reward": 0.9839409589767456, "rewards/mean_confidence_reward": 0.8621986746788025, "sampling/batch_mean_priority_error": 0.23946882227667854, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9694444444444444, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0038303794339299203, "sampling/priority_kl": 0.030000080913305284, "sampling/priority_scale": 1.8766139150131493, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 3.5752353142015636e-05, "sampling/prob_min": 7.404062307614367e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.10320000052452087, "sampling/prompt_draws_total": 3096.0, "sampling/seen_fraction": 0.10200666785240173, "sampling/unseen_fraction": 0.8979933321475982, "signal/accuracy_reward/centered_abs_mean": 0.2545138865709305, "signal/accuracy_reward/group_std_mean": 0.3217469334602356, "signal/accuracy_reward/group_zero_std_frac": 0.13888889253139497, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12725694328546525, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12725694328546525, "signal/advantage_abs_mean": 0.23891697824001312, "signal/advantage_pre_scale_abs_mean": 0.23891697824001312, "signal/advantage_pre_scale_std": 0.3288706004619598, "signal/advantage_std": 0.3288706004619598, "signal/brier_reward/centered_abs_mean": 0.21723864376544952, "signal/brier_reward/group_std_mean": 0.2735393285751343, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10861932188272476, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.10861932188272476, "signal/confidence_one_or_zero/centered_abs_mean": 0.1694227397441864, "signal/confidence_one_or_zero/group_std_mean": 0.2620739072561264, "signal/confidence_one_or_zero/group_zero_std_frac": 0.13333333432674407, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.694227375992341e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.694227375992341e-06, "signal/format_reward/centered_abs_mean": 0.02766384519636631, "signal/format_reward/group_std_mean": 0.057002028077840806, "signal/format_reward/group_zero_std_frac": 0.7444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013831922598183156, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013831922598183156, "signal/mean_confidence_reward/centered_abs_mean": 0.07951802164316177, "signal/mean_confidence_reward/group_std_mean": 0.11687755137681961, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.951802217576187e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.951802217576187e-07, "step": 45 }, { "calibration/aurc": 0.30584174843599155, "calibration/batch_distribution_entropy": 0.4698661757482432, "calibration/batch_entropy_100bins": 0.359959773527773, "calibration/batch_entropy_10bins": 0.4698661757482432, "calibration/batch_entropy_50bins": 0.42373882129655405, "calibration/batch_uniqueness": 0.4616731414535427, "calibration/confidence_entropy": 0.37746819721208036, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.04970530029191465, "calibration/coverage@20%": 0.182396604745731, "calibration/coverage@25%": 0.2594019523393139, "calibration/coverage@30%": 0.5653119338313628, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.4698661757482432, "calibration/distribution_entropy_100": 0.359959773527773, "calibration/ece": 0.22593068011557005, "calibration/mean_confidence": 0.8520271057071687, "calibration/unique_confidence_per_question": 0.03072916666666667, "calibration/unique_confidences": 11.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012065972222222231, "completions/max_length": 3768.4, "completions/max_terminated_length": 3768.4, "completions/mean_length": 568.4737854003906, "completions/mean_terminated_length": 575.4648315429688, "completions/min_length": 0.0, "completions/min_terminated_length": 120.4, "epoch": 0.1201923076923077, "grad_norm": 0.0008945247973315418, "learning_rate": 1.201923076923077e-06, "loss": -0.0054, "num_tokens": 88842246.0, "reward": 1.1304325819015504, "reward_std": 0.2705526381731033, "rewards/accuracy_reward": 0.5894097208976745, "rewards/brier_reward": 0.6880171537399292, "rewards/confidence_one_or_zero": 0.06059027761220932, "rewards/format_reward": 0.9834201335906982, "rewards/mean_confidence_reward": 0.8420838594436646, "sampling/batch_mean_priority_error": 0.1750527001027404, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9388888888888889, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004068218637257814, "sampling/priority_kl": 0.030000039935112, "sampling/priority_scale": 1.7397352575790137, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 3.592700959416106e-05, "sampling/prob_min": 8.291824997286312e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.11519999951124191, "sampling/prompt_draws_total": 3456.0, "sampling/seen_fraction": 0.11345999985933304, "sampling/unseen_fraction": 0.8865400001406669, "signal/accuracy_reward/centered_abs_mean": 0.21812066435813904, "signal/accuracy_reward/group_std_mean": 0.28616309762001035, "signal/accuracy_reward/group_zero_std_frac": 0.1916666656732559, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10906033217906952, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10906033217906952, "signal/advantage_abs_mean": 0.2035945475101471, "signal/advantage_pre_scale_abs_mean": 0.2035945475101471, "signal/advantage_pre_scale_std": 0.3032866775989532, "signal/advantage_std": 0.3032866775989532, "signal/brier_reward/centered_abs_mean": 0.18060558736324311, "signal/brier_reward/group_std_mean": 0.23320825695991515, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09030279368162156, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.09030279368162156, "signal/confidence_one_or_zero/centered_abs_mean": 0.10205078125, "signal/confidence_one_or_zero/group_std_mean": 0.17942221462726593, "signal/confidence_one_or_zero/group_zero_std_frac": 0.3222222238779068, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0205078069702722e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0205078069702722e-06, "signal/format_reward/centered_abs_mean": 0.02902018204331398, "signal/format_reward/group_std_mean": 0.055744080990552905, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01451009102165699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01451009102165699, "signal/mean_confidence_reward/centered_abs_mean": 0.0795755386352539, "signal/mean_confidence_reward/group_std_mean": 0.1120973750948906, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.957553975757037e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.957553975757037e-07, "step": 50 }, { "epoch": 0.1201923076923077, "eval_calibration/aurc": 0.36693158510830576, "eval_calibration/batch_distribution_entropy": 0.4879616019308547, "eval_calibration/batch_entropy_100bins": 0.35135930466067045, "eval_calibration/batch_entropy_10bins": 0.4879616019308547, "eval_calibration/batch_entropy_50bins": 0.41198770433599563, "eval_calibration/batch_uniqueness": 0.4472331427676404, "eval_calibration/confidence_entropy": 0.40263867487886834, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.02920353982300885, "eval_calibration/coverage@30%": 0.02920353982300885, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.4879616019308547, "eval_calibration/distribution_entropy_100": 0.35135930466067045, "eval_calibration/ece": 0.2521858407079647, "eval_calibration/mean_confidence": 0.8436902654867258, "eval_calibration/unique_confidence_per_question": 0.013020833333333334, "eval_calibration/unique_confidences": 15, "eval_completions/clipped_ratio": 0.01215277777777779, "eval_completions/max_length": 2192.5, "eval_completions/max_terminated_length": 2192.5, "eval_completions/mean_length": 593.2312622070312, "eval_completions/mean_terminated_length": 600.4430440266927, "eval_completions/min_length": 41.833333333333336, "eval_completions/min_terminated_length": 185.0, "eval_loss": 0.0, "eval_num_tokens": 88842246.0, "eval_reward": 1.1237780451774597, "eval_reward_std": 0.43651104470094043, "eval_rewards/accuracy_reward": 0.5815972089767456, "eval_rewards/brier_reward": 0.6850390136241913, "eval_rewards/confidence_one_or_zero": 0.02951388938042025, "eval_rewards/format_reward": 0.9809027711550394, "eval_rewards/mean_confidence_reward": 0.8281857669353485, "eval_runtime": 204.0598, "eval_samples_per_second": 4.901, "eval_signal/accuracy_reward/centered_abs_mean": 0.4702690988779068, "eval_signal/accuracy_reward/group_std_mean": 0.49234892924626666, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2351345494389534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2351345494389534, "eval_signal/advantage_abs_mean": 0.406392698486646, "eval_signal/advantage_pre_scale_abs_mean": 0.406392698486646, "eval_signal/advantage_pre_scale_std": 0.4317004084587097, "eval_signal/advantage_std": 0.4317004084587097, "eval_signal/brier_reward/centered_abs_mean": 0.32436902821063995, "eval_signal/brier_reward/group_std_mean": 0.35272473096847534, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16218451410531998, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.16218451410531998, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.05544704866285125, "eval_signal/confidence_one_or_zero/group_std_mean": 0.1265144463007649, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.416666679084301, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.544704796041818e-07, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.544704796041818e-07, "eval_signal/format_reward/centered_abs_mean": 0.03656684048473835, "eval_signal/format_reward/group_std_mean": 0.09607256638507049, "eval_signal/format_reward/group_zero_std_frac": 0.5000000074505806, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018283420242369175, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.018283420242369175, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.08548501133918762, "eval_signal/mean_confidence_reward/group_std_mean": 0.13385692487160364, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.54850100040494e-07, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 8.54850100040494e-07, "eval_steps_per_second": 0.029, "step": 50 }, { "epoch": 0.1201923076923077, "step": 50, "train_probe_calibration/aurc": 0.3232102008686786, "train_probe_calibration/batch_distribution_entropy": 0.5061747512148379, "train_probe_calibration/batch_entropy_100bins": 0.3552038459733368, "train_probe_calibration/batch_entropy_10bins": 0.5061747512148379, "train_probe_calibration/batch_entropy_50bins": 0.41709690460632987, "train_probe_calibration/batch_uniqueness": 0.45442081376408144, "train_probe_calibration/confidence_entropy": 0.40798928804916534, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.0, "train_probe_calibration/coverage@15%": 0.0, "train_probe_calibration/coverage@20%": 0.0, "train_probe_calibration/coverage@25%": 0.03353927625772286, "train_probe_calibration/coverage@30%": 0.5189761694616064, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.5061747512148379, "train_probe_calibration/distribution_entropy_100": 0.3552038459733368, "train_probe_calibration/ece": 0.2209002647837599, "train_probe_calibration/mean_confidence": 0.838729037952339, "train_probe_calibration/unique_confidence_per_question": 0.010416666666666666, "train_probe_calibration/unique_confidences": 12, "train_probe_completions/clipped_ratio": 0.0078125, "train_probe_completions/max_length": 2798.0, "train_probe_completions/max_terminated_length": 2798.0, "train_probe_completions/mean_length": 617.9297180175781, "train_probe_completions/mean_terminated_length": 622.6372578938802, "train_probe_completions/min_length": 43.166666666666664, "train_probe_completions/min_terminated_length": 179.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 88842246.0, "train_probe_reward": 1.1511651674906414, "train_probe_reward_std": 0.4263061086336772, "train_probe_rewards/accuracy_reward": 0.6076388955116272, "train_probe_rewards/brier_reward": 0.7111672163009644, "train_probe_rewards/confidence_one_or_zero": 0.0329861108524104, "train_probe_rewards/format_reward": 0.9835069477558136, "train_probe_rewards/mean_confidence_reward": 0.824895828962326, "train_probe_runtime": 203.2902, "train_probe_samples_per_second": 4.919, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4641927083333333, "train_probe_signal/accuracy_reward/group_std_mean": 0.4889263262351354, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23209635416666666, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.23209635416666666, "train_probe_signal/advantage_abs_mean": 0.3949887653191884, "train_probe_signal/advantage_pre_scale_abs_mean": 0.3949887653191884, "train_probe_signal/advantage_pre_scale_std": 0.4220304836829503, "train_probe_signal/advantage_std": 0.4220304836829503, "train_probe_signal/brier_reward/centered_abs_mean": 0.30813247958819073, "train_probe_signal/brier_reward/group_std_mean": 0.34062348306179047, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15406623979409537, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.15406623979409537, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.06228298662851254, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.1442195667574803, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.3333333432674408, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.228298445876135e-07, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.228298445876135e-07, "train_probe_signal/format_reward/centered_abs_mean": 0.03162977425381541, "train_probe_signal/format_reward/group_std_mean": 0.08433058392256498, "train_probe_signal/format_reward/group_zero_std_frac": 0.5555555696288744, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.015814887126907706, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.015814887126907706, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.08944117774566014, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.13466068108876547, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.944117553255637e-07, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 8.944117553255637e-07, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.3668222300226895, "calibration/batch_distribution_entropy": 0.5336098111385617, "calibration/batch_entropy_100bins": 0.35464743111118563, "calibration/batch_entropy_10bins": 0.5336098111385617, "calibration/batch_entropy_50bins": 0.41717774105665467, "calibration/batch_uniqueness": 0.47823134437695564, "calibration/confidence_entropy": 0.43410787120525496, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.02074468085106383, "calibration/coverage@20%": 0.12925531914893618, "calibration/coverage@25%": 0.2, "calibration/coverage@30%": 0.21461949754632678, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.5336098111385617, "calibration/distribution_entropy_100": 0.35464743111118563, "calibration/ece": 0.2233237631652508, "calibration/mean_confidence": 0.8207327630559618, "calibration/unique_confidence_per_question": 0.02708333333333333, "calibration/unique_confidences": 10.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01640625, "completions/max_length": 4039.6, "completions/max_terminated_length": 4039.6, "completions/mean_length": 613.0697143554687, "completions/mean_terminated_length": 623.2099731445312, "completions/min_length": 0.0, "completions/min_terminated_length": 120.4, "epoch": 0.13221153846153846, "grad_norm": 0.0007423937786370516, "learning_rate": 1.3221153846153848e-06, "loss": -0.0077, "num_tokens": 98970729.0, "reward": 1.1381091117858886, "reward_std": 0.2668158829212189, "rewards/accuracy_reward": 0.5957465291023254, "rewards/brier_reward": 0.7014619588851929, "rewards/confidence_one_or_zero": 0.02100694477558136, "rewards/format_reward": 0.9789930582046509, "rewards/mean_confidence_reward": 0.810854148864746, "sampling/batch_mean_priority_error": 0.14243097914081077, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9555555555555555, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004251902643591166, "sampling/priority_kl": 0.030000291764736176, "sampling/priority_scale": 1.6290871502365918, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 3.609485720517114e-05, "sampling/prob_min": 8.77213133207988e-06, "sampling/prompt_draws_max": 2.2, "sampling/prompt_draws_mean": 0.12720000147819518, "sampling/prompt_draws_total": 3816.0, "sampling/seen_fraction": 0.12482666820287705, "sampling/unseen_fraction": 0.875173331797123, "signal/accuracy_reward/centered_abs_mean": 0.2257758229970932, "signal/accuracy_reward/group_std_mean": 0.2867546260356903, "signal/accuracy_reward/group_zero_std_frac": 0.225, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1128879114985466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1128879114985466, "signal/advantage_abs_mean": 0.207180854678154, "signal/advantage_pre_scale_abs_mean": 0.207180854678154, "signal/advantage_pre_scale_std": 0.304352855682373, "signal/advantage_std": 0.304352855682373, "signal/brier_reward/centered_abs_mean": 0.17458488643169404, "signal/brier_reward/group_std_mean": 0.22114146649837493, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08729244321584702, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.08729244321584702, "signal/confidence_one_or_zero/centered_abs_mean": 0.03837890662252903, "signal/confidence_one_or_zero/group_std_mean": 0.08215054646134376, "signal/confidence_one_or_zero/group_zero_std_frac": 0.6333333492279053, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.837890574232006e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.837890574232006e-07, "signal/format_reward/centered_abs_mean": 0.0329210065305233, "signal/format_reward/group_std_mean": 0.055089053511619565, "signal/format_reward/group_zero_std_frac": 0.7916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01646050326526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01646050326526165, "signal/mean_confidence_reward/centered_abs_mean": 0.0847676783800125, "signal/mean_confidence_reward/group_std_mean": 0.11370683014392853, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.47676710691303e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.47676710691303e-07, "step": 55 }, { "calibration/aurc": 0.3340195114348269, "calibration/batch_distribution_entropy": 0.5726832276472693, "calibration/batch_entropy_100bins": 0.3492907326474214, "calibration/batch_entropy_10bins": 0.5726832276472693, "calibration/batch_entropy_50bins": 0.410924505266504, "calibration/batch_uniqueness": 0.4968363979020654, "calibration/confidence_entropy": 0.47785333312889183, "calibration/coverage@0%": 0.0026246719160104987, "calibration/coverage@1%": 0.0026246719160104987, "calibration/coverage@10%": 0.0026246719160104987, "calibration/coverage@15%": 0.0026246719160104987, "calibration/coverage@20%": 0.2347003807622639, "calibration/coverage@25%": 0.34856220657276993, "calibration/coverage@30%": 0.4510416666666667, "calibration/coverage@5%": 0.0026246719160104987, "calibration/distribution_entropy_10": 0.5726832276472693, "calibration/distribution_entropy_100": 0.3492907326474214, "calibration/ece": 0.17780991202081498, "calibration/mean_confidence": 0.7972725700954527, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013975694444444442, "completions/max_length": 3632.8, "completions/max_terminated_length": 3632.8, "completions/mean_length": 610.9026245117187, "completions/mean_terminated_length": 619.6228271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.8, "epoch": 0.14423076923076922, "grad_norm": 0.0008636752027086914, "learning_rate": 1.4423076923076922e-06, "loss": -0.008, "num_tokens": 109068231.0, "reward": 1.1724665880203247, "reward_std": 0.24019188582897186, "rewards/accuracy_reward": 0.6282986164093017, "rewards/brier_reward": 0.7318964719772338, "rewards/confidence_one_or_zero": 0.006597222201526165, "rewards/format_reward": 0.9847222208976746, "rewards/mean_confidence_reward": 0.7856779336929322, "sampling/batch_mean_priority_error": 0.10756469318231769, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9388888888888889, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004396716970950365, "sampling/priority_kl": 0.02999984547495842, "sampling/priority_scale": 1.5373026251327246, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 3.625838580774143e-05, "sampling/prob_min": 8.104404696496203e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.13919999897480012, "sampling/prompt_draws_total": 4176.0, "sampling/seen_fraction": 0.1361933320760727, "sampling/unseen_fraction": 0.8638066679239274, "signal/accuracy_reward/centered_abs_mean": 0.20860460102558137, "signal/accuracy_reward/group_std_mean": 0.2688582241535187, "signal/accuracy_reward/group_zero_std_frac": 0.26666666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10430230051279069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10430230051279069, "signal/advantage_abs_mean": 0.18264085054397583, "signal/advantage_pre_scale_abs_mean": 0.18264085054397583, "signal/advantage_pre_scale_std": 0.2767543405294418, "signal/advantage_std": 0.2767543405294418, "signal/brier_reward/centered_abs_mean": 0.14837830811738967, "signal/brier_reward/group_std_mean": 0.19285426437854766, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07418915405869483, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.07418915405869483, "signal/confidence_one_or_zero/centered_abs_mean": 0.012413194589316844, "signal/confidence_one_or_zero/group_std_mean": 0.03018292561173439, "signal/confidence_one_or_zero/group_zero_std_frac": 0.850000011920929, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.2413193815064005e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.2413193815064005e-07, "signal/format_reward/centered_abs_mean": 0.02427300363779068, "signal/format_reward/group_std_mean": 0.04464358240365982, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01213650181889534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01213650181889534, "signal/mean_confidence_reward/centered_abs_mean": 0.07837038338184357, "signal/mean_confidence_reward/group_std_mean": 0.10648758858442306, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.837037969693483e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.837037969693483e-07, "step": 60 }, { "calibration/aurc": 0.36082880327797945, "calibration/batch_distribution_entropy": 0.5609276442705863, "calibration/batch_entropy_100bins": 0.3219518099981292, "calibration/batch_entropy_10bins": 0.5609276442705863, "calibration/batch_entropy_50bins": 0.37899646159315503, "calibration/batch_uniqueness": 0.4252066320477523, "calibration/confidence_entropy": 0.5287127000393115, "calibration/coverage@0%": 0.0010540349123026288, "calibration/coverage@1%": 0.0010540349123026288, "calibration/coverage@10%": 0.034124901054034915, "calibration/coverage@15%": 0.034124901054034915, "calibration/coverage@20%": 0.03624130317043703, "calibration/coverage@25%": 0.20264550264550266, "calibration/coverage@30%": 0.24728959286407118, "calibration/coverage@5%": 0.0010540349123026288, "calibration/distribution_entropy_10": 0.5609276442705863, "calibration/distribution_entropy_100": 0.3219518099981292, "calibration/ece": 0.15328771390121856, "calibration/mean_confidence": 0.7603495482266917, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017013888888888884, "completions/max_length": 4044.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 667.0696166992187, "completions/mean_terminated_length": 678.5839111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.15625, "grad_norm": 0.0009765251888893545, "learning_rate": 1.5625e-06, "loss": -0.0067, "num_tokens": 119880969.0, "reward": 1.1687859058380128, "reward_std": 0.23864734768867493, "rewards/accuracy_reward": 0.6236979126930237, "rewards/brier_reward": 0.7352131009101868, "rewards/confidence_one_or_zero": 0.001996527845039964, "rewards/format_reward": 0.9786458373069763, "rewards/mean_confidence_reward": 0.7466666579246521, "sampling/batch_mean_priority_error": 0.08287107120565607, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.95, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0045060371980071064, "sampling/priority_kl": 0.029999903962016107, "sampling/priority_scale": 1.4598986505996436, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 3.6417489900486545e-05, "sampling/prob_min": 8.696256554685532e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.1512000024318695, "sampling/prompt_draws_total": 4536.0, "sampling/seen_fraction": 0.14750000238418579, "sampling/unseen_fraction": 0.8524999976158142, "signal/accuracy_reward/centered_abs_mean": 0.207763671875, "signal/accuracy_reward/group_std_mean": 0.26939095854759215, "signal/accuracy_reward/group_zero_std_frac": 0.2444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1038818359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1038818359375, "signal/advantage_abs_mean": 0.17969182133674622, "signal/advantage_pre_scale_abs_mean": 0.17969182133674622, "signal/advantage_pre_scale_std": 0.27758598923683164, "signal/advantage_std": 0.27758598923683164, "signal/brier_reward/centered_abs_mean": 0.13804238885641099, "signal/brier_reward/group_std_mean": 0.1800360769033432, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06902119442820549, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.06902119442820549, "signal/confidence_one_or_zero/centered_abs_mean": 0.003835720382630825, "signal/confidence_one_or_zero/group_std_mean": 0.010643551312386989, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9416666865348816, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.835720150391353e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.835720150391353e-08, "signal/format_reward/centered_abs_mean": 0.03416883647441864, "signal/format_reward/group_std_mean": 0.0599904865026474, "signal/format_reward/group_zero_std_frac": 0.7638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01708441823720932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01708441823720932, "signal/mean_confidence_reward/centered_abs_mean": 0.08167127966880798, "signal/mean_confidence_reward/group_std_mean": 0.11141330301761627, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.167127930391871e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.167127930391871e-07, "step": 65 }, { "calibration/aurc": 0.35516211390845653, "calibration/batch_distribution_entropy": 0.5715761667318768, "calibration/batch_entropy_100bins": 0.31538070674384944, "calibration/batch_entropy_10bins": 0.5715761667318768, "calibration/batch_entropy_50bins": 0.37126106516177676, "calibration/batch_uniqueness": 0.42730865779244337, "calibration/confidence_entropy": 0.5586211599671339, "calibration/coverage@0%": 0.000529100529100529, "calibration/coverage@1%": 0.000529100529100529, "calibration/coverage@10%": 0.000529100529100529, "calibration/coverage@15%": 0.0164021164021164, "calibration/coverage@20%": 0.028042328042328042, "calibration/coverage@25%": 0.13589134691496896, "calibration/coverage@30%": 0.22846310877806938, "calibration/coverage@5%": 0.000529100529100529, "calibration/distribution_entropy_10": 0.5715761667318768, "calibration/distribution_entropy_100": 0.31538070674384944, "calibration/ece": 0.13111338780650672, "calibration/mean_confidence": 0.7339990149186095, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018229166666666675, "completions/max_length": 4040.4, "completions/max_terminated_length": 4040.4, "completions/mean_length": 675.2895874023437, "completions/mean_terminated_length": 687.9796997070313, "completions/min_length": 0.0, "completions/min_terminated_length": 204.6, "epoch": 0.16826923076923078, "grad_norm": 0.0007695965468883514, "learning_rate": 1.682692307692308e-06, "loss": -0.0098, "num_tokens": 130770225.0, "reward": 1.1733962297439575, "reward_std": 0.22422350943088531, "rewards/accuracy_reward": 0.6266493082046509, "rewards/brier_reward": 0.7417432546615601, "rewards/confidence_one_or_zero": 0.0006076388934161514, "rewards/format_reward": 0.9783854126930237, "rewards/mean_confidence_reward": 0.716662323474884, "sampling/batch_mean_priority_error": 0.06427063559151758, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9305555555555556, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004584580659866333, "sampling/priority_kl": 0.030000272020697594, "sampling/priority_scale": 1.3937638641800731, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 3.6572145472746345e-05, "sampling/prob_min": 9.225038775184658e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.16319999992847442, "sampling/prompt_draws_total": 4896.0, "sampling/seen_fraction": 0.15870000123977662, "sampling/unseen_fraction": 0.8412999987602234, "signal/accuracy_reward/centered_abs_mean": 0.1993109792470932, "signal/accuracy_reward/group_std_mean": 0.260605588555336, "signal/accuracy_reward/group_zero_std_frac": 0.27777777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0996554896235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0996554896235466, "signal/advantage_abs_mean": 0.1672658920288086, "signal/advantage_pre_scale_abs_mean": 0.1672658920288086, "signal/advantage_pre_scale_std": 0.2621844053268433, "signal/advantage_std": 0.2621844053268433, "signal/brier_reward/centered_abs_mean": 0.12296129167079925, "signal/brier_reward/group_std_mean": 0.1609590619802475, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.061480645835399625, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.061480645835399625, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011664496385492384, "signal/confidence_one_or_zero/group_std_mean": 0.003138383664190769, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1664495502827777e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1664495502827777e-08, "signal/format_reward/centered_abs_mean": 0.03267686627805233, "signal/format_reward/group_std_mean": 0.0593119814991951, "signal/format_reward/group_zero_std_frac": 0.7611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016338433139026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016338433139026165, "signal/mean_confidence_reward/centered_abs_mean": 0.07880452424287795, "signal/mean_confidence_reward/group_std_mean": 0.1066696047782898, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.880452130848425e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.880452130848425e-07, "step": 70 }, { "calibration/aurc": 0.2786799113370667, "calibration/batch_distribution_entropy": 0.5479433779303641, "calibration/batch_entropy_100bins": 0.3002895704040479, "calibration/batch_entropy_10bins": 0.5479433779303641, "calibration/batch_entropy_50bins": 0.3534960236351027, "calibration/batch_uniqueness": 0.38395899595671973, "calibration/confidence_entropy": 0.5804533421710255, "calibration/coverage@0%": 0.004290037595336166, "calibration/coverage@1%": 0.004290037595336166, "calibration/coverage@10%": 0.019385679267933262, "calibration/coverage@15%": 0.019385679267933262, "calibration/coverage@20%": 0.08593125961735379, "calibration/coverage@25%": 0.46208589649528753, "calibration/coverage@30%": 0.6349800392679877, "calibration/coverage@5%": 0.004290037595336166, "calibration/distribution_entropy_10": 0.5479433779303641, "calibration/distribution_entropy_100": 0.3002895704040479, "calibration/ece": 0.08396643875579377, "calibration/mean_confidence": 0.7134022072125192, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01527777777777779, "completions/max_length": 3922.2, "completions/max_terminated_length": 3922.2, "completions/mean_length": 662.4786743164062, "completions/mean_terminated_length": 672.9049438476562, "completions/min_length": 0.0, "completions/min_terminated_length": 194.4, "epoch": 0.18028846153846154, "grad_norm": 0.0006578608299605548, "learning_rate": 1.8028846153846156e-06, "loss": -0.014, "num_tokens": 141473115.0, "reward": 1.2087363958358766, "reward_std": 0.2085721343755722, "rewards/accuracy_reward": 0.6685763835906983, "rewards/brier_reward": 0.7653754472732544, "rewards/confidence_one_or_zero": 0.0006944444554392249, "rewards/format_reward": 0.9835069417953491, "rewards/mean_confidence_reward": 0.6979835271835327, "sampling/batch_mean_priority_error": 0.050875203025745996, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9277777777777777, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004651712626218796, "sampling/priority_kl": 0.030000195279717444, "sampling/priority_scale": 1.336326873442158, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 3.672461898531765e-05, "sampling/prob_min": 9.745861825649627e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.17520000040531158, "sampling/prompt_draws_total": 5256.0, "sampling/seen_fraction": 0.16990666687488556, "sampling/unseen_fraction": 0.8300933331251145, "signal/accuracy_reward/centered_abs_mean": 0.18960503339767457, "signal/accuracy_reward/group_std_mean": 0.2473952054977417, "signal/accuracy_reward/group_zero_std_frac": 0.3083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09480251669883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09480251669883728, "signal/advantage_abs_mean": 0.15543261170387268, "signal/advantage_pre_scale_abs_mean": 0.15543261170387268, "signal/advantage_pre_scale_std": 0.2481265187263489, "signal/advantage_std": 0.2481265187263489, "signal/brier_reward/centered_abs_mean": 0.11165534853935241, "signal/brier_reward/group_std_mean": 0.1463429093360901, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.055827674269676206, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.055827674269676206, "signal/confidence_one_or_zero/centered_abs_mean": 0.0013346353895030915, "signal/confidence_one_or_zero/group_std_mean": 0.0036294300109148024, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9805555462837219, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3346353711085613e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3346353711085613e-08, "signal/format_reward/centered_abs_mean": 0.02710503488779068, "signal/format_reward/group_std_mean": 0.05075793266296387, "signal/format_reward/group_zero_std_frac": 0.7916666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01355251744389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01355251744389534, "signal/mean_confidence_reward/centered_abs_mean": 0.07513455003499984, "signal/mean_confidence_reward/group_std_mean": 0.10091831386089326, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.513454534091579e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.513454534091579e-07, "step": 75 }, { "calibration/aurc": 0.3689440863507865, "calibration/batch_distribution_entropy": 0.5184190021494378, "calibration/batch_entropy_100bins": 0.2806227972233616, "calibration/batch_entropy_10bins": 0.5184190021494378, "calibration/batch_entropy_50bins": 0.33034461645252283, "calibration/batch_uniqueness": 0.3326310367116993, "calibration/confidence_entropy": 0.6032100330460969, "calibration/coverage@0%": 0.001058201058201058, "calibration/coverage@1%": 0.001058201058201058, "calibration/coverage@10%": 0.001058201058201058, "calibration/coverage@15%": 0.004713553538618813, "calibration/coverage@20%": 0.004713553538618813, "calibration/coverage@25%": 0.004713553538618813, "calibration/coverage@30%": 0.39896942821224807, "calibration/coverage@5%": 0.001058201058201058, "calibration/distribution_entropy_10": 0.5184190021494378, "calibration/distribution_entropy_100": 0.2806227972233616, "calibration/ece": 0.1048484709501376, "calibration/mean_confidence": 0.6910483483132289, "calibration/unique_confidence_per_question": 0.023437499999999997, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333348, "completions/max_length": 3935.8, "completions/max_terminated_length": 3935.8, "completions/mean_length": 688.7568603515625, "completions/mean_terminated_length": 696.784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 183.4, "epoch": 0.19230769230769232, "grad_norm": 0.000616310047917068, "learning_rate": 1.9230769230769234e-06, "loss": -0.0075, "num_tokens": 152522666.0, "reward": 1.1828741550445556, "reward_std": 0.2077293336391449, "rewards/accuracy_reward": 0.6256944417953492, "rewards/brier_reward": 0.7528873682022095, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9871527791023255, "rewards/mean_confidence_reward": 0.6841449737548828, "sampling/batch_mean_priority_error": 0.047214103962108844, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9166666666666667, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0047060281969606875, "sampling/priority_kl": 0.030000150203704834, "sampling/priority_scale": 1.2860350491013377, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 3.6873909994028506e-05, "sampling/prob_min": 1.0286198084941134e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.18720000088214875, "sampling/prompt_draws_total": 5616.0, "sampling/seen_fraction": 0.18101999759674073, "sampling/unseen_fraction": 0.8189800024032593, "signal/accuracy_reward/centered_abs_mean": 0.20045573115348816, "signal/accuracy_reward/group_std_mean": 0.2571284741163254, "signal/accuracy_reward/group_zero_std_frac": 0.29166666269302366, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10022786557674408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10022786557674408, "signal/advantage_abs_mean": 0.1562511533498764, "signal/advantage_pre_scale_abs_mean": 0.1562511533498764, "signal/advantage_pre_scale_std": 0.24353698790073394, "signal/advantage_std": 0.24353698790073394, "signal/brier_reward/centered_abs_mean": 0.1068439707159996, "signal/brier_reward/group_std_mean": 0.14024724662303925, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0534219853579998, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0534219853579998, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.02290581576526165, "signal/format_reward/group_std_mean": 0.0464916467666626, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011452907882630824, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011452907882630824, "signal/mean_confidence_reward/centered_abs_mean": 0.06937960833311081, "signal/mean_confidence_reward/group_std_mean": 0.09569962471723556, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.937960620234662e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.937960620234662e-07, "step": 80 }, { "calibration/aurc": 0.34587047138080185, "calibration/batch_distribution_entropy": 0.4954573795095432, "calibration/batch_entropy_100bins": 0.27405746184331764, "calibration/batch_entropy_10bins": 0.4954573795095432, "calibration/batch_entropy_50bins": 0.3226160098693715, "calibration/batch_uniqueness": 0.3000675734530054, "calibration/confidence_entropy": 0.6101426873701342, "calibration/coverage@0%": 0.006401699894469639, "calibration/coverage@1%": 0.006401699894469639, "calibration/coverage@10%": 0.006401699894469639, "calibration/coverage@15%": 0.006401699894469639, "calibration/coverage@20%": 0.03345740281224152, "calibration/coverage@25%": 0.23664042668492055, "calibration/coverage@30%": 0.4647471549585009, "calibration/coverage@5%": 0.006401699894469639, "calibration/distribution_entropy_10": 0.4954573795095432, "calibration/distribution_entropy_100": 0.27405746184331764, "calibration/ece": 0.10014490493171968, "calibration/mean_confidence": 0.6821050410005329, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014843749999999978, "completions/max_length": 3979.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 667.094970703125, "completions/mean_terminated_length": 677.13818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.6, "epoch": 0.20432692307692307, "grad_norm": 0.0007630718173459172, "learning_rate": 2.043269230769231e-06, "loss": -0.0113, "num_tokens": 163328432.0, "reward": 1.1916515588760377, "reward_std": 0.21311030983924867, "rewards/accuracy_reward": 0.6413194537162781, "rewards/brier_reward": 0.7580292105674744, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9839409828186035, "rewards/mean_confidence_reward": 0.6716553926467895, "sampling/batch_mean_priority_error": 0.042610298929097075, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.875, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004756578896194697, "sampling/priority_kl": 0.029999878257513046, "sampling/priority_scale": 1.241810357524082, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 3.7016544956713916e-05, "sampling/prob_min": 1.0765067963802722e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.19919999837875366, "sampling/prompt_draws_total": 5976.0, "sampling/seen_fraction": 0.19177333116531373, "sampling/unseen_fraction": 0.8082266688346863, "signal/accuracy_reward/centered_abs_mean": 0.20234375, "signal/accuracy_reward/group_std_mean": 0.26508465111255647, "signal/accuracy_reward/group_zero_std_frac": 0.2527777820825577, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.101171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.101171875, "signal/advantage_abs_mean": 0.15687242150306702, "signal/advantage_pre_scale_abs_mean": 0.15687242150306702, "signal/advantage_pre_scale_std": 0.2482043296098709, "signal/advantage_std": 0.2482043296098709, "signal/brier_reward/centered_abs_mean": 0.1052109107375145, "signal/brier_reward/group_std_mean": 0.13991140723228454, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05260545536875725, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05260545536875725, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02864040844142437, "signal/format_reward/group_std_mean": 0.05566352233290672, "signal/format_reward/group_zero_std_frac": 0.7694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014320204220712184, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014320204220712184, "signal/mean_confidence_reward/centered_abs_mean": 0.0705166518688202, "signal/mean_confidence_reward/group_std_mean": 0.09815174639225006, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.05166485204245e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.05166485204245e-07, "step": 85 }, { "calibration/aurc": 0.359621361820331, "calibration/batch_distribution_entropy": 0.4875542266129493, "calibration/batch_entropy_100bins": 0.27016203664181926, "calibration/batch_entropy_10bins": 0.4875542266129493, "calibration/batch_entropy_50bins": 0.31803037834961934, "calibration/batch_uniqueness": 0.3108004230223583, "calibration/confidence_entropy": 0.6185976385079931, "calibration/coverage@0%": 0.0005235602094240838, "calibration/coverage@1%": 0.0005235602094240838, "calibration/coverage@10%": 0.027081825792079912, "calibration/coverage@15%": 0.030875863732459312, "calibration/coverage@20%": 0.030875863732459312, "calibration/coverage@25%": 0.030875863732459312, "calibration/coverage@30%": 0.0676212705566063, "calibration/coverage@5%": 0.0005235602094240838, "calibration/distribution_entropy_10": 0.4875542266129493, "calibration/distribution_entropy_100": 0.27016203664181926, "calibration/ece": 0.09774914445477971, "calibration/mean_confidence": 0.6697879296328936, "calibration/unique_confidence_per_question": 0.023437500000000003, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013888888888888905, "completions/max_length": 3912.4, "completions/max_terminated_length": 3912.4, "completions/mean_length": 688.4250854492187, "completions/mean_terminated_length": 698.1601928710937, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.21634615384615385, "grad_norm": 0.0006718159420415759, "learning_rate": 2.1634615384615387e-06, "loss": -0.0107, "num_tokens": 174374161.0, "reward": 1.190553903579712, "reward_std": 0.21019268929958343, "rewards/accuracy_reward": 0.6362847208976745, "rewards/brier_reward": 0.7601742506027221, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9846354126930237, "rewards/mean_confidence_reward": 0.6664713621139526, "sampling/batch_mean_priority_error": 0.039588163981791394, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8833333333333334, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004800818022340536, "sampling/priority_kl": 0.029999977350234984, "sampling/priority_scale": 1.2025536180008203, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 3.715234051924199e-05, "sampling/prob_min": 1.1198205720575061e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.21120000183582305, "sampling/prompt_draws_total": 6336.0, "sampling/seen_fraction": 0.20212666392326356, "sampling/unseen_fraction": 0.7978733360767365, "signal/accuracy_reward/centered_abs_mean": 0.20233290195465087, "signal/accuracy_reward/group_std_mean": 0.2649902403354645, "signal/accuracy_reward/group_zero_std_frac": 0.26388888657093046, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10116645097732543, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10116645097732543, "signal/advantage_abs_mean": 0.15588796138763428, "signal/advantage_pre_scale_abs_mean": 0.15588796138763428, "signal/advantage_pre_scale_std": 0.24400971829891205, "signal/advantage_std": 0.24400971829891205, "signal/brier_reward/centered_abs_mean": 0.10318200141191483, "signal/brier_reward/group_std_mean": 0.13609919250011443, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.051591000705957415, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.051591000705957415, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02562391422688961, "signal/format_reward/group_std_mean": 0.047272267565131185, "signal/format_reward/group_zero_std_frac": 0.8055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012811957113444804, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012811957113444804, "signal/mean_confidence_reward/centered_abs_mean": 0.06962320357561111, "signal/mean_confidence_reward/group_std_mean": 0.09376237392425538, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.962320298953273e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.962320298953273e-07, "step": 90 }, { "calibration/aurc": 0.360027708313517, "calibration/batch_distribution_entropy": 0.4813050950400042, "calibration/batch_entropy_100bins": 0.2578209922325956, "calibration/batch_entropy_10bins": 0.4813050950400042, "calibration/batch_entropy_50bins": 0.30350270054750683, "calibration/batch_uniqueness": 0.28367851731747284, "calibration/confidence_entropy": 0.6131506525449577, "calibration/coverage@0%": 0.002114180473319751, "calibration/coverage@1%": 0.002114180473319751, "calibration/coverage@10%": 0.002114180473319751, "calibration/coverage@15%": 0.002114180473319751, "calibration/coverage@20%": 0.07943111411176369, "calibration/coverage@25%": 0.11958632788345835, "calibration/coverage@30%": 0.305080325157146, "calibration/coverage@5%": 0.002114180473319751, "calibration/distribution_entropy_10": 0.4813050950400042, "calibration/distribution_entropy_100": 0.2578209922325956, "calibration/ece": 0.1002862750340386, "calibration/mean_confidence": 0.6807791476508745, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01059027777777779, "completions/max_length": 3668.4, "completions/max_terminated_length": 3668.4, "completions/mean_length": 644.8259643554687, "completions/mean_terminated_length": 651.8298095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 194.4, "epoch": 0.2283653846153846, "grad_norm": 0.0006763488054275513, "learning_rate": 2.283653846153846e-06, "loss": -0.0092, "num_tokens": 184896156.0, "reward": 1.1970339298248291, "reward_std": 0.1976634830236435, "rewards/accuracy_reward": 0.6417534589767456, "rewards/brier_reward": 0.763932728767395, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9883680462837219, "rewards/mean_confidence_reward": 0.6744001626968383, "sampling/batch_mean_priority_error": 0.03951753118071802, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9027777777777779, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004846246726810932, "sampling/priority_kl": 0.02999996431171894, "sampling/priority_scale": 1.167076528025791, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 3.729506061063148e-05, "sampling/prob_min": 1.1645950689853634e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.22319999933242798, "sampling/prompt_draws_total": 6696.0, "sampling/seen_fraction": 0.21301333606243134, "sampling/unseen_fraction": 0.7869866639375687, "signal/accuracy_reward/centered_abs_mean": 0.19283311665058137, "signal/accuracy_reward/group_std_mean": 0.2522544801235199, "signal/accuracy_reward/group_zero_std_frac": 0.2888889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09641655832529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09641655832529068, "signal/advantage_abs_mean": 0.14698622226715088, "signal/advantage_pre_scale_abs_mean": 0.14698622226715088, "signal/advantage_pre_scale_std": 0.23368164300918579, "signal/advantage_std": 0.23368164300918579, "signal/brier_reward/centered_abs_mean": 0.0987538680434227, "signal/brier_reward/group_std_mean": 0.1296857550740242, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04937693402171135, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04937693402171135, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.019194878451526166, "signal/format_reward/group_std_mean": 0.037654457986354826, "signal/format_reward/group_zero_std_frac": 0.8388888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009597439225763083, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009597439225763083, "signal/mean_confidence_reward/centered_abs_mean": 0.06403515413403511, "signal/mean_confidence_reward/group_std_mean": 0.08662768602371215, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.403514930752862e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.403514930752862e-07, "step": 95 }, { "calibration/aurc": 0.3596962699336317, "calibration/batch_distribution_entropy": 0.49572307893281886, "calibration/batch_entropy_100bins": 0.2738978163454987, "calibration/batch_entropy_10bins": 0.49572307893281886, "calibration/batch_entropy_50bins": 0.32242807777238164, "calibration/batch_uniqueness": 0.3183046592648424, "calibration/confidence_entropy": 0.6086933485955659, "calibration/coverage@0%": 0.0021108179419525065, "calibration/coverage@1%": 0.0021108179419525065, "calibration/coverage@10%": 0.0021108179419525065, "calibration/coverage@15%": 0.05593667546174143, "calibration/coverage@20%": 0.12996273000957584, "calibration/coverage@25%": 0.12996273000957584, "calibration/coverage@30%": 0.38856823990575723, "calibration/coverage@5%": 0.0021108179419525065, "calibration/distribution_entropy_10": 0.49572307893281886, "calibration/distribution_entropy_100": 0.2738978163454987, "calibration/ece": 0.12810381554201036, "calibration/mean_confidence": 0.6824774719423579, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013541666666666652, "completions/max_length": 3520.8, "completions/max_terminated_length": 3520.8, "completions/mean_length": 646.881005859375, "completions/mean_terminated_length": 655.8904174804687, "completions/min_length": 0.0, "completions/min_terminated_length": 188.6, "epoch": 0.2403846153846154, "grad_norm": 0.0006148848915472627, "learning_rate": 2.403846153846154e-06, "loss": -0.0111, "num_tokens": 195449921.0, "reward": 1.1851803302764892, "reward_std": 0.19183639883995057, "rewards/accuracy_reward": 0.6269965291023254, "rewards/brier_reward": 0.7580206274986268, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9853298664093018, "rewards/mean_confidence_reward": 0.680933153629303, "sampling/batch_mean_priority_error": 0.04106042591276217, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.888888888888889, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004891505371779204, "sampling/priority_kl": 0.030000001937150956, "sampling/priority_scale": 1.1352285265456885, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 3.743201086763293e-05, "sampling/prob_min": 1.2065826558682602e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.23519999980926515, "sampling/prompt_draws_total": 7056.0, "sampling/seen_fraction": 0.22354000210762023, "sampling/unseen_fraction": 0.7764599978923797, "signal/accuracy_reward/centered_abs_mean": 0.17950846254825592, "signal/accuracy_reward/group_std_mean": 0.23621359169483186, "signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08975423127412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08975423127412796, "signal/advantage_abs_mean": 0.14255423545837403, "signal/advantage_pre_scale_abs_mean": 0.14255423545837403, "signal/advantage_pre_scale_std": 0.2318326562643051, "signal/advantage_std": 0.2318326562643051, "signal/brier_reward/centered_abs_mean": 0.09948206096887588, "signal/brier_reward/group_std_mean": 0.1306991159915924, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04974103048443794, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04974103048443794, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.023301866464316846, "signal/format_reward/group_std_mean": 0.04023935534060001, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011650933232158423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011650933232158423, "signal/mean_confidence_reward/centered_abs_mean": 0.06637315228581428, "signal/mean_confidence_reward/group_std_mean": 0.0889115497469902, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.637314868385147e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.637314868385147e-07, "step": 100 }, { "epoch": 0.2403846153846154, "eval_calibration/aurc": 0.3264960431700285, "eval_calibration/batch_distribution_entropy": 0.5035579377534389, "eval_calibration/batch_entropy_100bins": 0.2686276995956658, "eval_calibration/batch_entropy_10bins": 0.5035579377534389, "eval_calibration/batch_entropy_50bins": 0.31622418160425286, "eval_calibration/batch_uniqueness": 0.3147685317330474, "eval_calibration/confidence_entropy": 0.5989185390589775, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.25769569041336854, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5035579377534389, "eval_calibration/distribution_entropy_100": 0.2686276995956658, "eval_calibration/ece": 0.06099384344766924, "eval_calibration/mean_confidence": 0.6955584872471415, "eval_calibration/unique_confidence_per_question": 0.011284722222222222, "eval_calibration/unique_confidences": 13, "eval_completions/clipped_ratio": 0.010416666666666666, "eval_completions/max_length": 2317.3333333333335, "eval_completions/max_terminated_length": 2317.3333333333335, "eval_completions/mean_length": 641.1512349446615, "eval_completions/mean_terminated_length": 647.976816813151, "eval_completions/min_length": 49.0, "eval_completions/min_terminated_length": 226.0, "eval_loss": 0.0, "eval_num_tokens": 195449921.0, "eval_reward": 1.1953096787134807, "eval_reward_std": 0.3530424237251282, "eval_rewards/accuracy_reward": 0.6423611144224802, "eval_rewards/brier_reward": 0.7612651983896891, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9869791567325592, "eval_rewards/mean_confidence_reward": 0.6865017215410868, "eval_runtime": 203.5004, "eval_samples_per_second": 4.914, "eval_signal/accuracy_reward/centered_abs_mean": 0.4403211822112401, "eval_signal/accuracy_reward/group_std_mean": 0.4756300499041875, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22016059110562006, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22016059110562006, "eval_signal/advantage_abs_mean": 0.3166052947441737, "eval_signal/advantage_pre_scale_abs_mean": 0.3166052947441737, "eval_signal/advantage_pre_scale_std": 0.35058291256427765, "eval_signal/advantage_std": 0.35058291256427765, "eval_signal/brier_reward/centered_abs_mean": 0.17967990785837173, "eval_signal/brier_reward/group_std_mean": 0.21290371815363565, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08983995392918587, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08983995392918587, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.025010850590964157, "eval_signal/format_reward/group_std_mean": 0.06767813799281915, "eval_signal/format_reward/group_zero_std_frac": 0.6388889054457346, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425295482079, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012505425295482079, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.07078721622625987, "eval_signal/mean_confidence_reward/group_std_mean": 0.10642425095041592, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.078721561507942e-07, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 7.078721561507942e-07, "eval_steps_per_second": 0.029, "step": 100 }, { "epoch": 0.2403846153846154, "step": 100, "train_probe_calibration/aurc": 0.2618583200446606, "train_probe_calibration/batch_distribution_entropy": 0.5051684118856261, "train_probe_calibration/batch_entropy_100bins": 0.2697463841725647, "train_probe_calibration/batch_entropy_10bins": 0.5051684118856261, "train_probe_calibration/batch_entropy_50bins": 0.3175410789880135, "train_probe_calibration/batch_uniqueness": 0.31938374294618566, "train_probe_calibration/confidence_entropy": 0.6020218923419944, "train_probe_calibration/coverage@0%": 0.007029876977152899, "train_probe_calibration/coverage@1%": 0.007029876977152899, "train_probe_calibration/coverage@10%": 0.007029876977152899, "train_probe_calibration/coverage@15%": 0.007029876977152899, "train_probe_calibration/coverage@20%": 0.2398945518453427, "train_probe_calibration/coverage@25%": 0.2398945518453427, "train_probe_calibration/coverage@30%": 0.710896309314587, "train_probe_calibration/coverage@5%": 0.007029876977152899, "train_probe_calibration/distribution_entropy_10": 0.5051684118856261, "train_probe_calibration/distribution_entropy_100": 0.2697463841725647, "train_probe_calibration/ece": 0.039543057996485054, "train_probe_calibration/mean_confidence": 0.6918277680140598, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.013715277777777793, "train_probe_completions/max_length": 2729.1666666666665, "train_probe_completions/max_terminated_length": 2729.1666666666665, "train_probe_completions/mean_length": 639.4520975748698, "train_probe_completions/mean_terminated_length": 648.3664245605469, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 193.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 195449921.0, "train_probe_reward": 1.2270989616711934, "train_probe_reward_std": 0.34227528671423596, "train_probe_rewards/accuracy_reward": 0.683159718910853, "train_probe_rewards/brier_reward": 0.7831770877043406, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.987847218910853, "train_probe_rewards/mean_confidence_reward": 0.6834201316038767, "train_probe_runtime": 200.9406, "train_probe_samples_per_second": 4.977, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4219292501608531, "train_probe_signal/accuracy_reward/group_std_mean": 0.4654071430365245, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21096462508042654, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.21096462508042654, "train_probe_signal/advantage_abs_mean": 0.3004860281944275, "train_probe_signal/advantage_pre_scale_abs_mean": 0.3004860281944275, "train_probe_signal/advantage_pre_scale_std": 0.33957551916440326, "train_probe_signal/advantage_std": 0.33957551916440326, "train_probe_signal/brier_reward/centered_abs_mean": 0.16428263982137045, "train_probe_signal/brier_reward/group_std_mean": 0.1990672672788302, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08214131991068523, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08214131991068523, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.023437499689559143, "train_probe_signal/format_reward/group_std_mean": 0.06575708525876205, "train_probe_signal/format_reward/group_zero_std_frac": 0.6388889104127884, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011718749844779571, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.011718749844779571, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.06948784242073695, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.10532646502057712, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.948784611419493e-07, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 6.948784611419493e-07, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.25365423358695904, "calibration/batch_distribution_entropy": 0.48674473663307144, "calibration/batch_entropy_100bins": 0.26873184440469255, "calibration/batch_entropy_10bins": 0.48674473663307144, "calibration/batch_entropy_50bins": 0.3163467791883904, "calibration/batch_uniqueness": 0.29883804015135357, "calibration/confidence_entropy": 0.603367084818194, "calibration/coverage@0%": 0.0015706878041144342, "calibration/coverage@1%": 0.0015706878041144342, "calibration/coverage@10%": 0.05157068780411443, "calibration/coverage@15%": 0.19297012061388755, "calibration/coverage@20%": 0.32617990848146156, "calibration/coverage@25%": 0.37274075504230814, "calibration/coverage@30%": 0.6159436880090207, "calibration/coverage@5%": 0.0015706878041144342, "calibration/distribution_entropy_10": 0.48674473663307144, "calibration/distribution_entropy_100": 0.26873184440469255, "calibration/ece": 0.0912085220880512, "calibration/mean_confidence": 0.6891120219009381, "calibration/unique_confidence_per_question": 0.022395833333333334, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00902777777777779, "completions/max_length": 3458.2, "completions/max_terminated_length": 3458.2, "completions/mean_length": 638.6677124023438, "completions/mean_terminated_length": 644.5072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 188.4, "epoch": 0.25240384615384615, "grad_norm": 0.0006219370989128947, "learning_rate": 2.5240384615384618e-06, "loss": -0.0084, "num_tokens": 205899021.0, "reward": 1.2294507503509522, "reward_std": 0.19787262380123138, "rewards/accuracy_reward": 0.6855902791023254, "rewards/brier_reward": 0.7831931471824646, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9901041746139526, "rewards/mean_confidence_reward": 0.6871875047683715, "sampling/batch_mean_priority_error": 0.039179960880272646, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004937084298580885, "sampling/priority_kl": 0.030000375211238862, "sampling/priority_scale": 1.1062577606644481, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 3.757406011573039e-05, "sampling/prob_min": 1.2466577754821628e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.24720000028610228, "sampling/prompt_draws_total": 7416.0, "sampling/seen_fraction": 0.23444666862487792, "sampling/unseen_fraction": 0.765553331375122, "signal/accuracy_reward/centered_abs_mean": 0.18909505307674407, "signal/accuracy_reward/group_std_mean": 0.251471620798111, "signal/accuracy_reward/group_zero_std_frac": 0.2777777761220932, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09454752653837203, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09454752653837203, "signal/advantage_abs_mean": 0.14458977580070495, "signal/advantage_pre_scale_abs_mean": 0.14458977580070495, "signal/advantage_pre_scale_std": 0.23422183096408844, "signal/advantage_std": 0.23422183096408844, "signal/brier_reward/centered_abs_mean": 0.09787606298923493, "signal/brier_reward/group_std_mean": 0.1316657304763794, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.048938031494617465, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.048938031494617465, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003255208255723119, "signal/confidence_one_or_zero/group_std_mean": 0.0006831518840044737, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.255208369523643e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.255208369523643e-09, "signal/format_reward/centered_abs_mean": 0.017773437686264514, "signal/format_reward/group_std_mean": 0.03596483059227466, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008886718843132257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008886718843132257, "signal/mean_confidence_reward/centered_abs_mean": 0.06357530206441879, "signal/mean_confidence_reward/group_std_mean": 0.08628400415182114, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.357530082823359e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.357530082823359e-07, "step": 105 }, { "calibration/aurc": 0.2731392392789653, "calibration/batch_distribution_entropy": 0.48010458764261943, "calibration/batch_entropy_100bins": 0.26175482708951114, "calibration/batch_entropy_10bins": 0.48010458764261943, "calibration/batch_entropy_50bins": 0.30813354729215314, "calibration/batch_uniqueness": 0.26838013652362525, "calibration/confidence_entropy": 0.597166460926026, "calibration/coverage@0%": 0.0026082463011314187, "calibration/coverage@1%": 0.0026082463011314187, "calibration/coverage@10%": 0.0026082463011314187, "calibration/coverage@15%": 0.0694489773716275, "calibration/coverage@20%": 0.2647040626734648, "calibration/coverage@25%": 0.5813071148825065, "calibration/coverage@30%": 0.6276680809399477, "calibration/coverage@5%": 0.0026082463011314187, "calibration/distribution_entropy_10": 0.48010458764261943, "calibration/distribution_entropy_100": 0.26175482708951114, "calibration/ece": 0.09699426452649675, "calibration/mean_confidence": 0.6960098487301523, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01128472222222221, "completions/max_length": 3859.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 637.8580810546875, "completions/mean_terminated_length": 645.131982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.2644230769230769, "grad_norm": 0.000706358696334064, "learning_rate": 2.6442307692307696e-06, "loss": -0.0113, "num_tokens": 216304106.0, "reward": 1.220960521697998, "reward_std": 0.19010037779808045, "rewards/accuracy_reward": 0.6734375, "rewards/brier_reward": 0.7804488062858581, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9880208253860474, "rewards/mean_confidence_reward": 0.694079864025116, "sampling/batch_mean_priority_error": 0.039559362733774556, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8666666666666666, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.004981566872447729, "sampling/priority_kl": 0.029999980702996254, "sampling/priority_scale": 1.0800061345566063, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 3.771074407268316e-05, "sampling/prob_min": 1.2843752301705536e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.2592000007629395, "sampling/prompt_draws_total": 7776.0, "sampling/seen_fraction": 0.24499333202838897, "sampling/unseen_fraction": 0.755006667971611, "signal/accuracy_reward/centered_abs_mean": 0.18021918535232545, "signal/accuracy_reward/group_std_mean": 0.23629273772239684, "signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09010959267616273, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09010959267616273, "signal/advantage_abs_mean": 0.14149389117956163, "signal/advantage_pre_scale_abs_mean": 0.14149389117956163, "signal/advantage_pre_scale_std": 0.23208475708961487, "signal/advantage_std": 0.23208475708961487, "signal/brier_reward/centered_abs_mean": 0.09630044549703598, "signal/brier_reward/group_std_mean": 0.12676727920770645, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04815022274851799, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04815022274851799, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02072482630610466, "signal/format_reward/group_std_mean": 0.03876565992832184, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01036241315305233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01036241315305233, "signal/mean_confidence_reward/centered_abs_mean": 0.06128065511584282, "signal/mean_confidence_reward/group_std_mean": 0.08464662134647369, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.128065251687076e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.128065251687076e-07, "step": 110 }, { "calibration/aurc": 0.23900958690948393, "calibration/batch_distribution_entropy": 0.4792917733999877, "calibration/batch_entropy_100bins": 0.25453673066305943, "calibration/batch_entropy_10bins": 0.4792917733999877, "calibration/batch_entropy_50bins": 0.29963652096675586, "calibration/batch_uniqueness": 0.23525715566464817, "calibration/confidence_entropy": 0.5841788415392271, "calibration/coverage@0%": 0.002645767716535433, "calibration/coverage@1%": 0.002645767716535433, "calibration/coverage@10%": 0.07197910104986877, "calibration/coverage@15%": 0.15427076771653545, "calibration/coverage@20%": 0.2620832677165354, "calibration/coverage@25%": 0.5331317247542449, "calibration/coverage@30%": 0.7069983914209116, "calibration/coverage@5%": 0.002645767716535433, "calibration/distribution_entropy_10": 0.4792917733999877, "calibration/distribution_entropy_100": 0.25453673066305943, "calibration/ece": 0.07616447667964926, "calibration/mean_confidence": 0.7118249107708302, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01649305555555556, "completions/max_length": 3940.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 656.9077270507812, "completions/mean_terminated_length": 668.1058471679687, "completions/min_length": 0.0, "completions/min_terminated_length": 174.6, "epoch": 0.2764423076923077, "grad_norm": 0.0010440150508657098, "learning_rate": 2.7644230769230775e-06, "loss": -0.0155, "num_tokens": 226953891.0, "reward": 1.2063098430633545, "reward_std": 0.20195430517196655, "rewards/accuracy_reward": 0.6577256917953491, "rewards/brier_reward": 0.7725014925003052, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9823784589767456, "rewards/mean_confidence_reward": 0.6949609279632568, "sampling/batch_mean_priority_error": 0.04175164310245379, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8805555555555555, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0050280562601983545, "sampling/priority_kl": 0.029999904334545135, "sampling/priority_scale": 1.0559982656966895, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 3.7847199564566836e-05, "sampling/prob_min": 1.3202533409639727e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.2712000012397766, "sampling/prompt_draws_total": 8136.0, "sampling/seen_fraction": 0.2555199980735779, "sampling/unseen_fraction": 0.7444800019264222, "signal/accuracy_reward/centered_abs_mean": 0.18530273735523223, "signal/accuracy_reward/group_std_mean": 0.24110294580459596, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09265136867761611, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09265136867761611, "signal/advantage_abs_mean": 0.14929196536540984, "signal/advantage_pre_scale_abs_mean": 0.14929196536540984, "signal/advantage_pre_scale_std": 0.2474428117275238, "signal/advantage_std": 0.2474428117275238, "signal/brier_reward/centered_abs_mean": 0.10281380563974381, "signal/brier_reward/group_std_mean": 0.13706456124782562, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.051406902819871904, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.051406902819871904, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.02937825545668602, "signal/format_reward/group_std_mean": 0.05295464023947716, "signal/format_reward/group_zero_std_frac": 0.7916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01468912772834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01468912772834301, "signal/mean_confidence_reward/centered_abs_mean": 0.06717475205659866, "signal/mean_confidence_reward/group_std_mean": 0.09251063913106919, "signal/mean_confidence_reward/group_zero_std_frac": 0.002777777798473835, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.717474661854794e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.717474661854794e-07, "step": 115 }, { "calibration/aurc": 0.2765707780359674, "calibration/batch_distribution_entropy": 0.46426059536033704, "calibration/batch_entropy_100bins": 0.24633244332746132, "calibration/batch_entropy_10bins": 0.46426059536033704, "calibration/batch_entropy_50bins": 0.2899785666595467, "calibration/batch_uniqueness": 0.2554472316323028, "calibration/confidence_entropy": 0.5880420811708887, "calibration/coverage@0%": 0.005281628929694076, "calibration/coverage@1%": 0.005281628929694076, "calibration/coverage@10%": 0.005281628929694076, "calibration/coverage@15%": 0.005281628929694076, "calibration/coverage@20%": 0.12280813811604578, "calibration/coverage@25%": 0.3352501998796019, "calibration/coverage@30%": 0.6500459979400643, "calibration/coverage@5%": 0.005281628929694076, "calibration/distribution_entropy_10": 0.46426059536033704, "calibration/distribution_entropy_100": 0.24633244332746132, "calibration/ece": 0.056203158003464426, "calibration/mean_confidence": 0.7093479335837578, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012673611111111116, "completions/max_length": 3711.0, "completions/max_terminated_length": 3711.0, "completions/mean_length": 631.4669189453125, "completions/mean_terminated_length": 639.7166870117187, "completions/min_length": 0.0, "completions/min_terminated_length": 169.4, "epoch": 0.28846153846153844, "grad_norm": 0.0008303358335979283, "learning_rate": 2.8846153846153845e-06, "loss": -0.0093, "num_tokens": 237324006.0, "reward": 1.235554051399231, "reward_std": 0.18194632232189178, "rewards/accuracy_reward": 0.6936632037162781, "rewards/brier_reward": 0.7907119989395142, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.986718761920929, "rewards/mean_confidence_reward": 0.7019965291023255, "sampling/batch_mean_priority_error": 0.03951839709516186, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8416666666666668, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0050725327804684636, "sampling/priority_kl": 0.03000000901520252, "sampling/priority_scale": 1.0340607046615333, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 3.797858953475952e-05, "sampling/prob_min": 1.2279798283998389e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.28320000171661375, "sampling/prompt_draws_total": 8496.0, "sampling/seen_fraction": 0.26567999720573426, "sampling/unseen_fraction": 0.7343200027942658, "signal/accuracy_reward/centered_abs_mean": 0.168896484375, "signal/accuracy_reward/group_std_mean": 0.2248100906610489, "signal/accuracy_reward/group_zero_std_frac": 0.3555555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0844482421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0844482421875, "signal/advantage_abs_mean": 0.1337295189499855, "signal/advantage_pre_scale_abs_mean": 0.1337295189499855, "signal/advantage_pre_scale_std": 0.2254096359014511, "signal/advantage_std": 0.2254096359014511, "signal/brier_reward/centered_abs_mean": 0.09312592148780822, "signal/brier_reward/group_std_mean": 0.12495446056127549, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04656296074390411, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04656296074390411, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01927625834941864, "signal/format_reward/group_std_mean": 0.036288988590240476, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00963812917470932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00963812917470932, "signal/mean_confidence_reward/centered_abs_mean": 0.05979058220982551, "signal/mean_confidence_reward/group_std_mean": 0.08202629685401916, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.979058300908946e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.979058300908946e-07, "step": 120 }, { "calibration/aurc": 0.28248611122206685, "calibration/batch_distribution_entropy": 0.4817757350877705, "calibration/batch_entropy_100bins": 0.2525112913705817, "calibration/batch_entropy_10bins": 0.4817757350877705, "calibration/batch_entropy_50bins": 0.2972522066029843, "calibration/batch_uniqueness": 0.29516464341044957, "calibration/confidence_entropy": 0.5860149585009575, "calibration/coverage@0%": 0.002090139843888836, "calibration/coverage@1%": 0.002090139843888836, "calibration/coverage@10%": 0.002090139843888836, "calibration/coverage@15%": 0.06491736497477887, "calibration/coverage@20%": 0.33621901321455316, "calibration/coverage@25%": 0.4885750341569616, "calibration/coverage@30%": 0.4938106362512024, "calibration/coverage@5%": 0.002090139843888836, "calibration/distribution_entropy_10": 0.4817757350877705, "calibration/distribution_entropy_100": 0.2525112913705817, "calibration/ece": 0.10515600893545998, "calibration/mean_confidence": 0.707127596378001, "calibration/unique_confidence_per_question": 0.018229166666666668, "calibration/unique_confidences": 7.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01362847222222221, "completions/max_length": 3608.6, "completions/max_terminated_length": 3608.6, "completions/mean_length": 655.3791748046875, "completions/mean_terminated_length": 664.4575073242188, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.3004807692307692, "grad_norm": 0.0007634198991581798, "learning_rate": 3.0048076923076923e-06, "loss": -0.0105, "num_tokens": 248013366.0, "reward": 1.201941180229187, "reward_std": 0.20476475059986116, "rewards/accuracy_reward": 0.6467881917953491, "rewards/brier_reward": 0.771316409111023, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9857638835906982, "rewards/mean_confidence_reward": 0.6920746684074401, "sampling/batch_mean_priority_error": 0.04121937903065245, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8111111111111111, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005118600558489561, "sampling/priority_kl": 0.029999775066971778, "sampling/priority_scale": 1.0137596725020557, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 3.8107689033495265e-05, "sampling/prob_min": 1.2597672321135178e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.29520000219345094, "sampling/prompt_draws_total": 8856.0, "sampling/seen_fraction": 0.2756599962711334, "sampling/unseen_fraction": 0.7243400037288665, "signal/accuracy_reward/centered_abs_mean": 0.2012641042470932, "signal/accuracy_reward/group_std_mean": 0.2568613201379776, "signal/accuracy_reward/group_zero_std_frac": 0.2972222179174423, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1006320521235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1006320521235466, "signal/advantage_abs_mean": 0.1566687285900116, "signal/advantage_pre_scale_abs_mean": 0.1566687285900116, "signal/advantage_pre_scale_std": 0.24594251811504364, "signal/advantage_std": 0.24594251811504364, "signal/brier_reward/centered_abs_mean": 0.10521574020385742, "signal/brier_reward/group_std_mean": 0.13690231144428253, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05260787010192871, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05260787010192871, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003255208255723119, "signal/confidence_one_or_zero/group_std_mean": 0.0006831518840044737, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.255208369523643e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.255208369523643e-09, "signal/format_reward/centered_abs_mean": 0.022851562686264516, "signal/format_reward/group_std_mean": 0.039858097583055495, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011425781343132258, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011425781343132258, "signal/mean_confidence_reward/centered_abs_mean": 0.06317572891712189, "signal/mean_confidence_reward/group_std_mean": 0.08579769432544708, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.317572797343018e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.317572797343018e-07, "step": 125 }, { "calibration/aurc": 0.27523049109574826, "calibration/batch_distribution_entropy": 0.4980071222686771, "calibration/batch_entropy_100bins": 0.2809631885026639, "calibration/batch_entropy_10bins": 0.4980071222686771, "calibration/batch_entropy_50bins": 0.3307453195590327, "calibration/batch_uniqueness": 0.36682188726614934, "calibration/confidence_entropy": 0.6059415049134447, "calibration/coverage@0%": 0.0010610079575596816, "calibration/coverage@1%": 0.0010610079575596816, "calibration/coverage@10%": 0.11211363953650703, "calibration/coverage@15%": 0.1656903532039648, "calibration/coverage@20%": 0.2866758129509338, "calibration/coverage@25%": 0.41138054560484977, "calibration/coverage@30%": 0.6329594929732709, "calibration/coverage@5%": 0.0010610079575596816, "calibration/distribution_entropy_10": 0.4980071222686771, "calibration/distribution_entropy_100": 0.2809631885026639, "calibration/ece": 0.10925301852963129, "calibration/mean_confidence": 0.6789147319741574, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010937500000000022, "completions/max_length": 3922.2, "completions/max_terminated_length": 3922.2, "completions/mean_length": 650.4537353515625, "completions/mean_terminated_length": 657.6193115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 188.6, "epoch": 0.3125, "grad_norm": 0.0013687816681340337, "learning_rate": 3.125e-06, "loss": -0.0093, "num_tokens": 258602529.0, "reward": 1.2259597539901734, "reward_std": 0.17786626517772675, "rewards/accuracy_reward": 0.6735243082046509, "rewards/brier_reward": 0.7898400783538818, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9885416626930237, "rewards/mean_confidence_reward": 0.6681119918823242, "sampling/batch_mean_priority_error": 0.033332646520728156, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8083333333333332, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0051580403000116345, "sampling/priority_kl": 0.029999729618430136, "sampling/priority_scale": 0.9950135349761695, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 3.823342267423868e-05, "sampling/prob_min": 1.29005729831988e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3072000026702881, "sampling/prompt_draws_total": 9216.0, "sampling/seen_fraction": 0.2853800058364868, "sampling/unseen_fraction": 0.7146199941635132, "signal/accuracy_reward/centered_abs_mean": 0.171142578125, "signal/accuracy_reward/group_std_mean": 0.22980001866817473, "signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0855712890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0855712890625, "signal/advantage_abs_mean": 0.12813933938741684, "signal/advantage_pre_scale_abs_mean": 0.12813933938741684, "signal/advantage_pre_scale_std": 0.21815860867500306, "signal/advantage_std": 0.21815860867500306, "signal/brier_reward/centered_abs_mean": 0.08448321223258973, "signal/brier_reward/group_std_mean": 0.11474492251873017, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.042241606116294864, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.042241606116294864, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02050781212747097, "signal/format_reward/group_std_mean": 0.0391859769821167, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010253906063735486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010253906063735486, "signal/mean_confidence_reward/centered_abs_mean": 0.06431071013212204, "signal/mean_confidence_reward/group_std_mean": 0.08673321157693863, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.431070801227179e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.431070801227179e-07, "step": 130 }, { "calibration/aurc": 0.2949663717931138, "calibration/batch_distribution_entropy": 0.4976870100283005, "calibration/batch_entropy_100bins": 0.3003926554336508, "calibration/batch_entropy_10bins": 0.4976870100283005, "calibration/batch_entropy_50bins": 0.35361737366404944, "calibration/batch_uniqueness": 0.4320600368780066, "calibration/confidence_entropy": 0.6141245140457932, "calibration/coverage@0%": 0.0015665796344647518, "calibration/coverage@1%": 0.0015665796344647518, "calibration/coverage@10%": 0.0015665796344647518, "calibration/coverage@15%": 0.0015665796344647518, "calibration/coverage@20%": 0.19849162231964446, "calibration/coverage@25%": 0.2945751732334826, "calibration/coverage@30%": 0.6622418702363284, "calibration/coverage@5%": 0.0015665796344647518, "calibration/distribution_entropy_10": 0.4976870100283005, "calibration/distribution_entropy_100": 0.3003926554336508, "calibration/ece": 0.12498686483284764, "calibration/mean_confidence": 0.6626829831819878, "calibration/unique_confidence_per_question": 0.0171875, "calibration/unique_confidences": 6.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009722222222222254, "completions/max_length": 3440.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 674.4981811523437, "completions/mean_terminated_length": 681.1444458007812, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.3245192307692308, "grad_norm": 0.002276240848004818, "learning_rate": 3.245192307692308e-06, "loss": -0.0085, "num_tokens": 269462412.0, "reward": 1.2220972537994386, "reward_std": 0.17137631475925447, "rewards/accuracy_reward": 0.6633680582046508, "rewards/brier_reward": 0.7907957911491394, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9900173544883728, "rewards/mean_confidence_reward": 0.659448790550232, "sampling/batch_mean_priority_error": 0.03146580492743215, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8055555555555556, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005192387942224741, "sampling/priority_kl": 0.030000027641654013, "sampling/priority_scale": 0.977628904604353, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 3.836079340544529e-05, "sampling/prob_min": 1.3191566358727868e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.31919999718666076, "sampling/prompt_draws_total": 9576.0, "sampling/seen_fraction": 0.2951733350753784, "sampling/unseen_fraction": 0.7048266649246215, "signal/accuracy_reward/centered_abs_mean": 0.16806640625, "signal/accuracy_reward/group_std_mean": 0.2234184443950653, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.084033203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.084033203125, "signal/advantage_abs_mean": 0.12492303550243378, "signal/advantage_pre_scale_abs_mean": 0.12492303550243378, "signal/advantage_pre_scale_std": 0.2111401379108429, "signal/advantage_std": 0.2111401379108429, "signal/brier_reward/centered_abs_mean": 0.08217985183000565, "signal/brier_reward/group_std_mean": 0.11212488561868668, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04108992591500282, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04108992591500282, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01755099855363369, "signal/format_reward/group_std_mean": 0.035760215297341344, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008775499276816845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008775499276816845, "signal/mean_confidence_reward/centered_abs_mean": 0.06346814930438996, "signal/mean_confidence_reward/group_std_mean": 0.08517944514751434, "signal/mean_confidence_reward/group_zero_std_frac": 0.002777777798473835, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.346814870994422e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.346814870994422e-07, "step": 135 }, { "calibration/aurc": 0.161821302991387, "calibration/batch_distribution_entropy": 0.5380996745311886, "calibration/batch_entropy_100bins": 0.3263382773335613, "calibration/batch_entropy_10bins": 0.5380996745311886, "calibration/batch_entropy_50bins": 0.38416013996797876, "calibration/batch_uniqueness": 0.48281429749503796, "calibration/confidence_entropy": 0.5990824174049726, "calibration/coverage@0%": 0.0015693711521547933, "calibration/coverage@1%": 0.0015693711521547933, "calibration/coverage@10%": 0.27670903441669037, "calibration/coverage@15%": 0.3979907857535888, "calibration/coverage@20%": 0.7749324795283459, "calibration/coverage@25%": 0.8180112451254938, "calibration/coverage@30%": 0.9941868298433729, "calibration/coverage@5%": 0.060392900563919494, "calibration/distribution_entropy_10": 0.5380996745311886, "calibration/distribution_entropy_100": 0.3263382773335613, "calibration/ece": 0.08859721584863484, "calibration/mean_confidence": 0.6633440478453676, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013541666666666674, "completions/max_length": 3597.4, "completions/max_terminated_length": 3597.4, "completions/mean_length": 650.0008911132812, "completions/mean_terminated_length": 658.90849609375, "completions/min_length": 0.0, "completions/min_terminated_length": 180.6, "epoch": 0.33653846153846156, "grad_norm": 0.000727808685041964, "learning_rate": 3.365384615384616e-06, "loss": -0.013, "num_tokens": 280008822.0, "reward": 1.2597820997238158, "reward_std": 0.1712968796491623, "rewards/accuracy_reward": 0.7206597208976746, "rewards/brier_reward": 0.8126063346862793, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9862847208976746, "rewards/mean_confidence_reward": 0.6640309572219849, "sampling/batch_mean_priority_error": 0.031656787110785164, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8222222222222223, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005227043479681015, "sampling/priority_kl": 0.029999838396906852, "sampling/priority_scale": 0.961505502439104, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 3.8488073914777485e-05, "sampling/prob_min": 1.3470378689817153e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3312000036239624, "sampling/prompt_draws_total": 9936.0, "sampling/seen_fraction": 0.3049199998378754, "sampling/unseen_fraction": 0.6950800001621247, "signal/accuracy_reward/centered_abs_mean": 0.1687391519546509, "signal/accuracy_reward/group_std_mean": 0.2210682988166809, "signal/accuracy_reward/group_zero_std_frac": 0.37777777314186095, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08436957597732545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08436957597732545, "signal/advantage_abs_mean": 0.12814967036247255, "signal/advantage_pre_scale_abs_mean": 0.12814967036247255, "signal/advantage_pre_scale_std": 0.21529237926006317, "signal/advantage_std": 0.21529237926006317, "signal/brier_reward/centered_abs_mean": 0.08760491609573365, "signal/brier_reward/group_std_mean": 0.1167238175868988, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043802458047866824, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.043802458047866824, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006727430503815412, "signal/confidence_one_or_zero/group_std_mean": 0.0019641853868961334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/format_reward/centered_abs_mean": 0.020887586660683154, "signal/format_reward/group_std_mean": 0.03523364253342152, "signal/format_reward/group_zero_std_frac": 0.8638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010443793330341577, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010443793330341577, "signal/mean_confidence_reward/centered_abs_mean": 0.07050719410181046, "signal/mean_confidence_reward/group_std_mean": 0.09147275984287262, "signal/mean_confidence_reward/group_zero_std_frac": 0.00555555559694767, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.050719091239443e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.050719091239443e-07, "step": 140 }, { "calibration/aurc": 0.2002019075525349, "calibration/batch_distribution_entropy": 0.5122078027614163, "calibration/batch_entropy_100bins": 0.29976493766985196, "calibration/batch_entropy_10bins": 0.5122078027614163, "calibration/batch_entropy_50bins": 0.35287843446889366, "calibration/batch_uniqueness": 0.4097952308753282, "calibration/confidence_entropy": 0.590766473840011, "calibration/coverage@0%": 0.002086053089643168, "calibration/coverage@1%": 0.002086053089643168, "calibration/coverage@10%": 0.1858980635335074, "calibration/coverage@15%": 0.5017274400600803, "calibration/coverage@20%": 0.7080111738117296, "calibration/coverage@25%": 0.8, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.002086053089643168, "calibration/distribution_entropy_10": 0.5122078027614163, "calibration/distribution_entropy_100": 0.29976493766985196, "calibration/ece": 0.10117033702167381, "calibration/mean_confidence": 0.6790366267827563, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555559, "completions/max_length": 3492.0, "completions/max_terminated_length": 3492.0, "completions/mean_length": 636.8101684570313, "completions/mean_terminated_length": 644.3724243164063, "completions/min_length": 0.0, "completions/min_terminated_length": 178.2, "epoch": 0.3485576923076923, "grad_norm": 0.000746583566069603, "learning_rate": 3.4855769230769233e-06, "loss": -0.0119, "num_tokens": 290450027.0, "reward": 1.236671781539917, "reward_std": 0.1658506065607071, "rewards/accuracy_reward": 0.682031261920929, "rewards/brier_reward": 0.8031913042068481, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9881076335906982, "rewards/mean_confidence_reward": 0.6686701536178589, "sampling/batch_mean_priority_error": 0.03593021038001605, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8111111111111111, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0052638577297329904, "sampling/priority_kl": 0.030000052228569985, "sampling/priority_scale": 0.9464996993308887, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 3.861593140754849e-05, "sampling/prob_min": 1.3738372217630967e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3431999981403351, "sampling/prompt_draws_total": 10296.0, "sampling/seen_fraction": 0.31465333700180054, "sampling/unseen_fraction": 0.6853466629981995, "signal/accuracy_reward/centered_abs_mean": 0.16121419370174409, "signal/accuracy_reward/group_std_mean": 0.2107767105102539, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08060709685087204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08060709685087204, "signal/advantage_abs_mean": 0.12273389399051667, "signal/advantage_pre_scale_abs_mean": 0.12273389399051667, "signal/advantage_pre_scale_std": 0.211455237865448, "signal/advantage_std": 0.211455237865448, "signal/brier_reward/centered_abs_mean": 0.08627397716045379, "signal/brier_reward/group_std_mean": 0.11665287166833878, "signal/brier_reward/group_zero_std_frac": 0.008333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043136988580226895, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.043136988580226895, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.02012261264026165, "signal/format_reward/group_std_mean": 0.03786379247903824, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010061306320130825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010061306320130825, "signal/mean_confidence_reward/centered_abs_mean": 0.06955652832984924, "signal/mean_confidence_reward/group_std_mean": 0.09108842760324479, "signal/mean_confidence_reward/group_zero_std_frac": 0.008333333395421505, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.955652793294576e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.955652793294576e-07, "step": 145 }, { "calibration/aurc": 0.22469899536459623, "calibration/batch_distribution_entropy": 0.49352183964938645, "calibration/batch_entropy_100bins": 0.2907041538671515, "calibration/batch_entropy_10bins": 0.49352183964938645, "calibration/batch_entropy_50bins": 0.3422122263786084, "calibration/batch_uniqueness": 0.37005133120849526, "calibration/confidence_entropy": 0.58310095568642, "calibration/coverage@0%": 0.001045767716535433, "calibration/coverage@1%": 0.001045767716535433, "calibration/coverage@10%": 0.1145874343832021, "calibration/coverage@15%": 0.28893460881481636, "calibration/coverage@20%": 0.4735757592549591, "calibration/coverage@25%": 0.6433276057980674, "calibration/coverage@30%": 0.7105192068479361, "calibration/coverage@5%": 0.001045767716535433, "calibration/distribution_entropy_10": 0.49352183964938645, "calibration/distribution_entropy_100": 0.2907041538671515, "calibration/ece": 0.0853479206156124, "calibration/mean_confidence": 0.6878991824983854, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015885416666666652, "completions/max_length": 3841.4, "completions/max_terminated_length": 3841.4, "completions/mean_length": 671.563720703125, "completions/mean_terminated_length": 682.4291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 201.6, "epoch": 0.3605769230769231, "grad_norm": 0.0005632630200125277, "learning_rate": 3.605769230769231e-06, "loss": -0.0151, "num_tokens": 301300905.0, "reward": 1.2439425945281983, "reward_std": 0.17063007950782777, "rewards/accuracy_reward": 0.6962673544883728, "rewards/brier_reward": 0.8076630473136902, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9839409828186035, "rewards/mean_confidence_reward": 0.685508668422699, "sampling/batch_mean_priority_error": 0.03787541302960014, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7638888888888888, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.00530406478792429, "sampling/priority_kl": 0.03000020757317543, "sampling/priority_scale": 0.9324293434852734, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 3.873952809954062e-05, "sampling/prob_min": 1.3995698463986627e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.35520000457763673, "sampling/prompt_draws_total": 10656.0, "sampling/seen_fraction": 0.32404000163078306, "sampling/unseen_fraction": 0.6759599983692169, "signal/accuracy_reward/centered_abs_mean": 0.14857313334941863, "signal/accuracy_reward/group_std_mean": 0.20339624285697938, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07428656667470931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07428656667470931, "signal/advantage_abs_mean": 0.12221142053604125, "signal/advantage_pre_scale_abs_mean": 0.12221142053604125, "signal/advantage_pre_scale_std": 0.21580576300621032, "signal/advantage_std": 0.21580576300621032, "signal/brier_reward/centered_abs_mean": 0.09183170050382614, "signal/brier_reward/group_std_mean": 0.1252654179930687, "signal/brier_reward/group_zero_std_frac": 0.03611111212521791, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04591585025191307, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04591585025191307, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02568901889026165, "signal/format_reward/group_std_mean": 0.04536781832575798, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012844509445130825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012844509445130825, "signal/mean_confidence_reward/centered_abs_mean": 0.07101247161626816, "signal/mean_confidence_reward/group_std_mean": 0.09455030411481857, "signal/mean_confidence_reward/group_zero_std_frac": 0.04722222331911326, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.101247319951654e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.101247319951654e-07, "step": 150 }, { "epoch": 0.3605769230769231, "eval_calibration/aurc": 0.21648289229710788, "eval_calibration/batch_distribution_entropy": 0.48338692999776756, "eval_calibration/batch_entropy_100bins": 0.27455615561797053, "eval_calibration/batch_entropy_10bins": 0.48338692999776756, "eval_calibration/batch_entropy_50bins": 0.32320306411209526, "eval_calibration/batch_uniqueness": 0.2413918376060083, "eval_calibration/confidence_entropy": 0.5636802490436134, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.5665198237885463, "eval_calibration/coverage@25%": 0.7541850220264317, "eval_calibration/coverage@30%": 0.8176211453744493, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.48338692999776756, "eval_calibration/distribution_entropy_100": 0.27455615561797053, "eval_calibration/ece": 0.05352422907488988, "eval_calibration/mean_confidence": 0.7074449339207047, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.013020833333333334, "eval_completions/max_length": 2320.5, "eval_completions/max_terminated_length": 2320.5, "eval_completions/mean_length": 684.08056640625, "eval_completions/mean_terminated_length": 693.1365661621094, "eval_completions/min_length": 54.666666666666664, "eval_completions/min_terminated_length": 242.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 301300905.0, "eval_reward": 1.227612296740214, "eval_reward_std": 0.34397614002227783, "eval_rewards/accuracy_reward": 0.6710069378217062, "eval_rewards/brier_reward": 0.7989605267842611, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9852430522441864, "eval_rewards/mean_confidence_reward": 0.6970052123069763, "eval_runtime": 210.4163, "eval_samples_per_second": 4.752, "eval_signal/accuracy_reward/centered_abs_mean": 0.4160698751608531, "eval_signal/accuracy_reward/group_std_mean": 0.4616414209206899, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20803493758042654, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20803493758042654, "eval_signal/advantage_abs_mean": 0.2960173139969508, "eval_signal/advantage_pre_scale_abs_mean": 0.2960173139969508, "eval_signal/advantage_pre_scale_std": 0.34291206300258636, "eval_signal/advantage_std": 0.34291206300258636, "eval_signal/brier_reward/centered_abs_mean": 0.17483383417129517, "eval_signal/brier_reward/group_std_mean": 0.22024944176276526, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08741691708564758, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08741691708564758, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.02794053762530287, "eval_signal/format_reward/group_std_mean": 0.06800450353572766, "eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013970268812651435, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013970268812651435, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.11518825963139534, "eval_signal/mean_confidence_reward/group_std_mean": 0.15239649017651877, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.1518825620745095e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.1518825620745095e-06, "eval_steps_per_second": 0.029, "step": 150 }, { "epoch": 0.3605769230769231, "step": 150, "train_probe_calibration/aurc": 0.20118106887974196, "train_probe_calibration/batch_distribution_entropy": 0.48299539334492897, "train_probe_calibration/batch_entropy_100bins": 0.2740406292224008, "train_probe_calibration/batch_entropy_10bins": 0.48299539334492897, "train_probe_calibration/batch_entropy_50bins": 0.32259619478037777, "train_probe_calibration/batch_uniqueness": 0.25400156219004166, "train_probe_calibration/confidence_entropy": 0.5653509032070053, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.0, "train_probe_calibration/coverage@15%": 0.006161971830985915, "train_probe_calibration/coverage@20%": 0.5607394366197183, "train_probe_calibration/coverage@25%": 0.8116197183098591, "train_probe_calibration/coverage@30%": 0.9964788732394366, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.48299539334492897, "train_probe_calibration/distribution_entropy_100": 0.2740406292224008, "train_probe_calibration/ece": 0.04286971830985919, "train_probe_calibration/mean_confidence": 0.7078345070422535, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.014583333333333356, "train_probe_completions/max_length": 2331.5, "train_probe_completions/max_terminated_length": 2331.5, "train_probe_completions/mean_length": 686.4166870117188, "train_probe_completions/mean_terminated_length": 696.6106872558594, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 215.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 301300905.0, "train_probe_reward": 1.2423616449038188, "train_probe_reward_std": 0.3410983880360921, "train_probe_rewards/accuracy_reward": 0.6901041666666666, "train_probe_rewards/brier_reward": 0.8084939221541086, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9861111243565878, "train_probe_rewards/mean_confidence_reward": 0.6980034708976746, "train_probe_runtime": 212.6957, "train_probe_samples_per_second": 4.702, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4138997445503871, "train_probe_signal/accuracy_reward/group_std_mean": 0.46029503146807355, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20694987227519354, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20694987227519354, "train_probe_signal/advantage_abs_mean": 0.2931293447812398, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2931293447812398, "train_probe_signal/advantage_pre_scale_std": 0.33921169737974805, "train_probe_signal/advantage_std": 0.33921169737974805, "train_probe_signal/brier_reward/centered_abs_mean": 0.16598876068989435, "train_probe_signal/brier_reward/group_std_mean": 0.2144825185338656, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08299438034494717, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08299438034494717, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.026258680193374555, "train_probe_signal/format_reward/group_std_mean": 0.06309404006848733, "train_probe_signal/format_reward/group_zero_std_frac": 0.6944444676240286, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013129340096687278, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.013129340096687278, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1145426481962204, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.14749648794531822, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.1454264381427492e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.1454264381427492e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.22965670297129837, "calibration/batch_distribution_entropy": 0.4561284084250222, "calibration/batch_entropy_100bins": 0.25662249926372777, "calibration/batch_entropy_10bins": 0.4561284084250222, "calibration/batch_entropy_50bins": 0.3020918540160094, "calibration/batch_uniqueness": 0.16051952690981258, "calibration/confidence_entropy": 0.558045726461945, "calibration/coverage@0%": 0.002655612171382654, "calibration/coverage@1%": 0.002655612171382654, "calibration/coverage@10%": 0.002655612171382654, "calibration/coverage@15%": 0.002655612171382654, "calibration/coverage@20%": 0.43610356000332223, "calibration/coverage@25%": 0.7411703971289201, "calibration/coverage@30%": 0.9039506908389366, "calibration/coverage@5%": 0.002655612171382654, "calibration/distribution_entropy_10": 0.4561284084250222, "calibration/distribution_entropy_100": 0.25662249926372777, "calibration/ece": 0.07278548683083944, "calibration/mean_confidence": 0.7137022027408687, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013975694444444464, "completions/max_length": 3620.6, "completions/max_terminated_length": 3620.6, "completions/mean_length": 692.3322021484375, "completions/mean_terminated_length": 702.1637329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 187.2, "epoch": 0.37259615384615385, "grad_norm": 0.0006764737772755325, "learning_rate": 3.725961538461539e-06, "loss": -0.0135, "num_tokens": 312347452.0, "reward": 1.2194071769714356, "reward_std": 0.17755380570888518, "rewards/accuracy_reward": 0.6553819417953491, "rewards/brier_reward": 0.7974809050559998, "rewards/confidence_one_or_zero": 0.00026041667442768814, "rewards/format_reward": 0.985937488079071, "rewards/mean_confidence_reward": 0.6956162929534913, "sampling/batch_mean_priority_error": 0.044587768701128454, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7972222222222223, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005351596139371395, "sampling/priority_kl": 0.02999967634677887, "sampling/priority_scale": 0.9192980109946802, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 3.886347622028552e-05, "sampling/prob_min": 1.4243225450627506e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3671999990940094, "sampling/prompt_draws_total": 11016.0, "sampling/seen_fraction": 0.33339332938194277, "sampling/unseen_fraction": 0.6666066706180572, "signal/accuracy_reward/centered_abs_mean": 0.16404079794883727, "signal/accuracy_reward/group_std_mean": 0.2167521059513092, "signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08202039897441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08202039897441864, "signal/advantage_abs_mean": 0.12833702713251113, "signal/advantage_pre_scale_abs_mean": 0.12833702713251113, "signal/advantage_pre_scale_std": 0.221464666724205, "signal/advantage_std": 0.221464666724205, "signal/brier_reward/centered_abs_mean": 0.09387256652116775, "signal/brier_reward/group_std_mean": 0.12882587313652039, "signal/brier_reward/group_zero_std_frac": 0.04722222331911326, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.046936283260583876, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.046936283260583876, "signal/confidence_one_or_zero/centered_abs_mean": 0.00047200522385537624, "signal/confidence_one_or_zero/group_std_mean": 0.0008226238191127777, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.7200515496115255e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.7200515496115255e-09, "signal/format_reward/centered_abs_mean": 0.023242188058793543, "signal/format_reward/group_std_mean": 0.04568904340267181, "signal/format_reward/group_zero_std_frac": 0.8055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011621094029396772, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011621094029396772, "signal/mean_confidence_reward/centered_abs_mean": 0.06486843228340149, "signal/mean_confidence_reward/group_std_mean": 0.09012808799743652, "signal/mean_confidence_reward/group_zero_std_frac": 0.06111111231148243, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.486842949016137e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.486842949016137e-07, "step": 155 }, { "calibration/aurc": 0.23205115891678957, "calibration/batch_distribution_entropy": 0.43420742788405137, "calibration/batch_entropy_100bins": 0.2390598214395338, "calibration/batch_entropy_10bins": 0.43420742788405137, "calibration/batch_entropy_50bins": 0.2814173538431148, "calibration/batch_uniqueness": 0.10195991736118337, "calibration/confidence_entropy": 0.552720533534938, "calibration/coverage@0%": 0.004207814359698442, "calibration/coverage@1%": 0.004207814359698442, "calibration/coverage@10%": 0.13457430650629532, "calibration/coverage@15%": 0.16651147928116442, "calibration/coverage@20%": 0.407872735825667, "calibration/coverage@25%": 0.5434927885961156, "calibration/coverage@30%": 0.7420298591455085, "calibration/coverage@5%": 0.004207814359698442, "calibration/distribution_entropy_10": 0.43420742788405137, "calibration/distribution_entropy_100": 0.2390598214395338, "calibration/ece": 0.10113857306348302, "calibration/mean_confidence": 0.7223825960740257, "calibration/unique_confidence_per_question": 0.018229166666666668, "calibration/unique_confidences": 7.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 2959.4, "completions/max_terminated_length": 2959.4, "completions/mean_length": 677.7904663085938, "completions/mean_terminated_length": 683.8215942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 197.6, "epoch": 0.38461538461538464, "grad_norm": 0.0006002213340252638, "learning_rate": 3.846153846153847e-06, "loss": -0.0078, "num_tokens": 323238542.0, "reward": 1.2424018144607545, "reward_std": 0.1606945961713791, "rewards/accuracy_reward": 0.6875867962837219, "rewards/brier_reward": 0.8059702754020691, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9912326455116272, "rewards/mean_confidence_reward": 0.6932335138320923, "sampling/batch_mean_priority_error": 0.04362556407230752, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8138888888888889, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005402308888733387, "sampling/priority_kl": 0.030000124871730805, "sampling/priority_scale": 0.9071862280601636, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 3.899485018337146e-05, "sampling/prob_min": 1.4481612379313446e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.37919999957084655, "sampling/prompt_draws_total": 11376.0, "sampling/seen_fraction": 0.34318000078201294, "sampling/unseen_fraction": 0.6568199992179871, "signal/accuracy_reward/centered_abs_mean": 0.15255534052848815, "signal/accuracy_reward/group_std_mean": 0.20133095383644103, "signal/accuracy_reward/group_zero_std_frac": 0.4305555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07627767026424408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07627767026424408, "signal/advantage_abs_mean": 0.11574586480855942, "signal/advantage_pre_scale_abs_mean": 0.11574586480855942, "signal/advantage_pre_scale_std": 0.2071862369775772, "signal/advantage_std": 0.2071862369775772, "signal/brier_reward/centered_abs_mean": 0.08608684539794922, "signal/brier_reward/group_std_mean": 0.11757852882146835, "signal/brier_reward/group_zero_std_frac": 0.06944444626569748, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04304342269897461, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04304342269897461, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.016097005270421506, "signal/format_reward/group_std_mean": 0.0349950447678566, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008048502635210753, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008048502635210753, "signal/mean_confidence_reward/centered_abs_mean": 0.06561442390084267, "signal/mean_confidence_reward/group_std_mean": 0.08899795264005661, "signal/mean_confidence_reward/group_zero_std_frac": 0.08055555745959282, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.561442205565981e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.561442205565981e-07, "step": 160 }, { "calibration/aurc": 0.22540844687267875, "calibration/batch_distribution_entropy": 0.50386111626777, "calibration/batch_entropy_100bins": 0.27361577246112806, "calibration/batch_entropy_10bins": 0.50386111626777, "calibration/batch_entropy_50bins": 0.3220960602751323, "calibration/batch_uniqueness": 0.2634407387572316, "calibration/confidence_entropy": 0.5741133615322693, "calibration/coverage@0%": 0.0005221932114882506, "calibration/coverage@1%": 0.0005221932114882506, "calibration/coverage@10%": 0.1364801984608321, "calibration/coverage@15%": 0.22525304441383467, "calibration/coverage@20%": 0.4553799642709908, "calibration/coverage@25%": 0.5910975643984225, "calibration/coverage@30%": 0.7239594937576296, "calibration/coverage@5%": 0.10708387300151449, "calibration/distribution_entropy_10": 0.50386111626777, "calibration/distribution_entropy_100": 0.27361577246112806, "calibration/ece": 0.10513270199830167, "calibration/mean_confidence": 0.6773881662806778, "calibration/unique_confidence_per_question": 0.018750000000000003, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00789930555555558, "completions/max_length": 3310.8, "completions/max_terminated_length": 3310.8, "completions/mean_length": 687.6014038085938, "completions/mean_terminated_length": 693.0419189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.39663461538461536, "grad_norm": 0.0007785888155922294, "learning_rate": 3.966346153846154e-06, "loss": -0.0083, "num_tokens": 334271806.0, "reward": 1.232865309715271, "reward_std": 0.1643844425678253, "rewards/accuracy_reward": 0.6660590291023254, "rewards/brier_reward": 0.8076440930366516, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9920138955116272, "rewards/mean_confidence_reward": 0.6794357776641846, "sampling/batch_mean_priority_error": 0.04254050978087628, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7833333333333333, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005447699595242739, "sampling/priority_kl": 0.029999880865216256, "sampling/priority_scale": 0.8958297192817554, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 3.91241439501755e-05, "sampling/prob_min": 1.470900224376237e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.39120000004768374, "sampling/prompt_draws_total": 11736.0, "sampling/seen_fraction": 0.3527533352375031, "sampling/unseen_fraction": 0.6472466647624969, "signal/accuracy_reward/centered_abs_mean": 0.166943359375, "signal/accuracy_reward/group_std_mean": 0.217330726981163, "signal/accuracy_reward/group_zero_std_frac": 0.3861111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0834716796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0834716796875, "signal/advantage_abs_mean": 0.11999084949493408, "signal/advantage_pre_scale_abs_mean": 0.11999084949493408, "signal/advantage_pre_scale_std": 0.2072890281677246, "signal/advantage_std": 0.2072890281677246, "signal/brier_reward/centered_abs_mean": 0.08629768192768097, "signal/brier_reward/group_std_mean": 0.11852459907531739, "signal/brier_reward/group_zero_std_frac": 0.09444444701075554, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04314884096384049, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04314884096384049, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01467013880610466, "signal/format_reward/group_std_mean": 0.03149766884744167, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00733506940305233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00733506940305233, "signal/mean_confidence_reward/centered_abs_mean": 0.06508653238415718, "signal/mean_confidence_reward/group_std_mean": 0.08856085240840912, "signal/mean_confidence_reward/group_zero_std_frac": 0.11666666865348815, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.508653086712002e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.508653086712002e-07, "step": 165 }, { "calibration/aurc": 0.17985976022582273, "calibration/batch_distribution_entropy": 0.413745285715431, "calibration/batch_entropy_100bins": 0.22452526520252097, "calibration/batch_entropy_10bins": 0.413745285715431, "calibration/batch_entropy_50bins": 0.26430750940805287, "calibration/batch_uniqueness": 0.11678018501184825, "calibration/confidence_entropy": 0.573921612271716, "calibration/coverage@0%": 0.0010526315789473684, "calibration/coverage@1%": 0.0010526315789473684, "calibration/coverage@10%": 0.0010526315789473684, "calibration/coverage@15%": 0.4791675433804964, "calibration/coverage@20%": 0.7099612139451155, "calibration/coverage@25%": 0.7926837270341207, "calibration/coverage@30%": 0.9301837270341207, "calibration/coverage@5%": 0.0010526315789473684, "calibration/distribution_entropy_10": 0.413745285715431, "calibration/distribution_entropy_100": 0.22452526520252097, "calibration/ece": 0.06974335424247227, "calibration/mean_confidence": 0.688039419033681, "calibration/unique_confidence_per_question": 0.014583333333333334, "calibration/unique_confidences": 5.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333326, "completions/max_length": 3253.0, "completions/max_terminated_length": 3253.0, "completions/mean_length": 712.0092041015625, "completions/mean_terminated_length": 720.3050659179687, "completions/min_length": 0.0, "completions/min_terminated_length": 187.4, "epoch": 0.40865384615384615, "grad_norm": 0.0007355434936471283, "learning_rate": 4.086538461538462e-06, "loss": -0.0097, "num_tokens": 345598280.0, "reward": 1.2475274801254272, "reward_std": 0.1580900192260742, "rewards/accuracy_reward": 0.6924479246139527, "rewards/brier_reward": 0.8140516638755798, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9885416626930237, "rewards/mean_confidence_reward": 0.6819965600967407, "sampling/batch_mean_priority_error": 0.04085833234276857, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7277777777777777, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005493382271379232, "sampling/priority_kl": 0.02999972701072693, "sampling/priority_scale": 0.8849816142814234, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 3.9243818901013586e-05, "sampling/prob_min": 1.4929330063750967e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.4032000005245209, "sampling/prompt_draws_total": 12096.0, "sampling/seen_fraction": 0.3616133391857147, "sampling/unseen_fraction": 0.6383866608142853, "signal/accuracy_reward/centered_abs_mean": 0.15483398288488387, "signal/accuracy_reward/group_std_mean": 0.2024546504020691, "signal/accuracy_reward/group_zero_std_frac": 0.4361111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07741699144244193, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07741699144244193, "signal/advantage_abs_mean": 0.1167249247431755, "signal/advantage_pre_scale_abs_mean": 0.1167249247431755, "signal/advantage_pre_scale_std": 0.2062102288007736, "signal/advantage_std": 0.2062102288007736, "signal/brier_reward/centered_abs_mean": 0.08543108999729157, "signal/brier_reward/group_std_mean": 0.11673992872238159, "signal/brier_reward/group_zero_std_frac": 0.1250000014901161, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.042715544998645785, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.042715544998645785, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.018695746455341576, "signal/format_reward/group_std_mean": 0.033667823672294615, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009347873227670788, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009347873227670788, "signal/mean_confidence_reward/centered_abs_mean": 0.06826469749212265, "signal/mean_confidence_reward/group_std_mean": 0.09210723340511322, "signal/mean_confidence_reward/group_zero_std_frac": 0.14166666716337203, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.826469984844152e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.826469984844152e-07, "step": 170 }, { "calibration/aurc": 0.21927099575947104, "calibration/batch_distribution_entropy": 0.3604094691063196, "calibration/batch_entropy_100bins": 0.1965044571986865, "calibration/batch_entropy_10bins": 0.3604094691063196, "calibration/batch_entropy_50bins": 0.23132186759881396, "calibration/batch_uniqueness": -0.05702418518762982, "calibration/confidence_entropy": 0.5533325414996917, "calibration/coverage@0%": 0.0010416666666666667, "calibration/coverage@1%": 0.0010416666666666667, "calibration/coverage@10%": 0.3256489965095986, "calibration/coverage@15%": 0.340832242582897, "calibration/coverage@20%": 0.488612921434072, "calibration/coverage@25%": 0.49070169428002497, "calibration/coverage@30%": 0.6922723749082973, "calibration/coverage@5%": 0.0010416666666666667, "calibration/distribution_entropy_10": 0.3604094691063196, "calibration/distribution_entropy_100": 0.1965044571986865, "calibration/ece": 0.1368860904827325, "calibration/mean_confidence": 0.7124982051032311, "calibration/unique_confidence_per_question": 0.016145833333333335, "calibration/unique_confidences": 6.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0052951388888889065, "completions/max_length": 2967.6, "completions/max_terminated_length": 2967.6, "completions/mean_length": 720.8404663085937, "completions/mean_terminated_length": 724.6657348632813, "completions/min_length": 0.0, "completions/min_terminated_length": 195.2, "epoch": 0.4206730769230769, "grad_norm": 0.0006969543755985796, "learning_rate": 4.20673076923077e-06, "loss": -0.0049, "num_tokens": 356992282.0, "reward": 1.2345721244812011, "reward_std": 0.1517510175704956, "rewards/accuracy_reward": 0.6669270753860473, "rewards/brier_reward": 0.8076720833778381, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.99453125, "rewards/mean_confidence_reward": 0.689978277683258, "sampling/batch_mean_priority_error": 0.04690258188712983, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7444444444444444, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.00554170049726963, "sampling/priority_kl": 0.029999922215938567, "sampling/priority_scale": 0.8748339593177661, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 3.9364156691590324e-05, "sampling/prob_min": 1.5141977200983092e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.415200001001358, "sampling/prompt_draws_total": 12456.0, "sampling/seen_fraction": 0.3704466700553894, "sampling/unseen_fraction": 0.6295533299446106, "signal/accuracy_reward/centered_abs_mean": 0.15778537094593048, "signal/accuracy_reward/group_std_mean": 0.20124109387397765, "signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07889268547296524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07889268547296524, "signal/advantage_abs_mean": 0.1146531030535698, "signal/advantage_pre_scale_abs_mean": 0.1146531030535698, "signal/advantage_pre_scale_std": 0.19675582647323608, "signal/advantage_std": 0.19675582647323608, "signal/brier_reward/centered_abs_mean": 0.08641349822282791, "signal/brier_reward/group_std_mean": 0.1162311390042305, "signal/brier_reward/group_zero_std_frac": 0.16111111342906953, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04320674911141396, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04320674911141396, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009901258815079927, "signal/format_reward/group_std_mean": 0.022229401022195817, "signal/format_reward/group_zero_std_frac": 0.8944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004950629407539963, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004950629407539963, "signal/mean_confidence_reward/centered_abs_mean": 0.06902343928813934, "signal/mean_confidence_reward/group_std_mean": 0.09159857630729676, "signal/mean_confidence_reward/group_zero_std_frac": 0.18888889253139496, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.902343670844857e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.902343670844857e-07, "step": 175 }, { "calibration/aurc": 0.167481550360431, "calibration/batch_distribution_entropy": 0.3882773633167407, "calibration/batch_entropy_100bins": 0.21081035817577246, "calibration/batch_entropy_10bins": 0.3882773633167407, "calibration/batch_entropy_50bins": 0.24816254276150107, "calibration/batch_uniqueness": 0.025158058255912157, "calibration/confidence_entropy": 0.5634601281146097, "calibration/coverage@0%": 0.002096989185252367, "calibration/coverage@1%": 0.002096989185252367, "calibration/coverage@10%": 0.22303939756221572, "calibration/coverage@15%": 0.6428123840540875, "calibration/coverage@20%": 0.6590496214151241, "calibration/coverage@25%": 0.6957950282392711, "calibration/coverage@30%": 0.8132053519488075, "calibration/coverage@5%": 0.12146871693394345, "calibration/distribution_entropy_10": 0.3882773633167407, "calibration/distribution_entropy_100": 0.21081035817577246, "calibration/ece": 0.11586185334078787, "calibration/mean_confidence": 0.6786814728079384, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006510416666666652, "completions/max_length": 2608.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 692.8581665039062, "completions/mean_terminated_length": 697.3784790039062, "completions/min_length": 0.0, "completions/min_terminated_length": 228.6, "epoch": 0.4326923076923077, "grad_norm": 0.0006377384415827692, "learning_rate": 4.326923076923077e-06, "loss": -0.0057, "num_tokens": 368071608.0, "reward": 1.2389385461807252, "reward_std": 0.14253281354904174, "rewards/accuracy_reward": 0.6736111164093017, "rewards/brier_reward": 0.8110234379768372, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9932291746139527, "rewards/mean_confidence_reward": 0.6653298616409302, "sampling/batch_mean_priority_error": 0.046377329209952564, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7166666666666668, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005593094788491726, "sampling/priority_kl": 0.029999877139925957, "sampling/priority_scale": 0.8652638375526294, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 3.948584635509178e-05, "sampling/prob_min": 1.5348353190347553e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.4272000014781952, "sampling/prompt_draws_total": 12816.0, "sampling/seen_fraction": 0.37928666472434996, "sampling/unseen_fraction": 0.62071333527565, "signal/accuracy_reward/centered_abs_mean": 0.14998914897441865, "signal/accuracy_reward/group_std_mean": 0.19401742815971373, "signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07499457448720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07499457448720932, "signal/advantage_abs_mean": 0.10474234223365783, "signal/advantage_pre_scale_abs_mean": 0.10474234223365783, "signal/advantage_pre_scale_std": 0.1851011574268341, "signal/advantage_std": 0.1851011574268341, "signal/brier_reward/centered_abs_mean": 0.08834660351276398, "signal/brier_reward/group_std_mean": 0.11847157329320908, "signal/brier_reward/group_zero_std_frac": 0.14166666865348815, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04417330175638199, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04417330175638199, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0115234375, "signal/format_reward/group_std_mean": 0.025313834473490716, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00576171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00576171875, "signal/mean_confidence_reward/centered_abs_mean": 0.08030165135860443, "signal/mean_confidence_reward/group_std_mean": 0.10517692267894745, "signal/mean_confidence_reward/group_zero_std_frac": 0.1638888895511627, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.030164622141456e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.030164622141456e-07, "step": 180 }, { "calibration/aurc": 0.19044998736844715, "calibration/batch_distribution_entropy": 0.4409406822397132, "calibration/batch_entropy_100bins": 0.24965460938374448, "calibration/batch_entropy_10bins": 0.4409406822397132, "calibration/batch_entropy_50bins": 0.2938893667888068, "calibration/batch_uniqueness": 0.14487271940651691, "calibration/confidence_entropy": 0.558334050340173, "calibration/coverage@0%": 0.006266318537859009, "calibration/coverage@1%": 0.006266318537859009, "calibration/coverage@10%": 0.006266318537859009, "calibration/coverage@15%": 0.5150388925152307, "calibration/coverage@20%": 0.7192069190600522, "calibration/coverage@25%": 0.7990208877284595, "calibration/coverage@30%": 0.9112271540469974, "calibration/coverage@5%": 0.006266318537859009, "calibration/distribution_entropy_10": 0.4409406822397132, "calibration/distribution_entropy_100": 0.24965460938374448, "calibration/ece": 0.07587018603133158, "calibration/mean_confidence": 0.6548105689730199, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666652, "completions/max_length": 2245.6, "completions/max_terminated_length": 2245.6, "completions/mean_length": 736.2003540039062, "completions/mean_terminated_length": 739.3214111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 231.4, "epoch": 0.44471153846153844, "grad_norm": 0.0005959332338534296, "learning_rate": 4.447115384615385e-06, "loss": -0.0035, "num_tokens": 379675452.0, "reward": 1.248991894721985, "reward_std": 0.13299419730901718, "rewards/accuracy_reward": 0.6863715410232544, "rewards/brier_reward": 0.8158528804779053, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9957465291023254, "rewards/mean_confidence_reward": 0.6423176884651184, "sampling/batch_mean_priority_error": 0.044696025078938384, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7138888888888889, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005641408730298281, "sampling/priority_kl": 0.030000098049640656, "sampling/priority_scale": 0.856269246363081, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 3.960601825383492e-05, "sampling/prob_min": 1.554702212160919e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.43920000195503234, "sampling/prompt_draws_total": 13176.0, "sampling/seen_fraction": 0.3879533350467682, "sampling/unseen_fraction": 0.6120466649532318, "signal/accuracy_reward/centered_abs_mean": 0.13031141310930253, "signal/accuracy_reward/group_std_mean": 0.1774735778570175, "signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06515570655465126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06515570655465126, "signal/advantage_abs_mean": 0.09407005459070206, "signal/advantage_pre_scale_abs_mean": 0.09407005459070206, "signal/advantage_pre_scale_std": 0.169356307387352, "signal/advantage_std": 0.169356307387352, "signal/brier_reward/centered_abs_mean": 0.08961217999458312, "signal/brier_reward/group_std_mean": 0.12095813304185868, "signal/brier_reward/group_zero_std_frac": 0.0722222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04480608999729156, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04480608999729156, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.00793728269636631, "signal/format_reward/group_std_mean": 0.01877547800540924, "signal/format_reward/group_zero_std_frac": 0.9083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003968641348183155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003968641348183155, "signal/mean_confidence_reward/centered_abs_mean": 0.08483316749334335, "signal/mean_confidence_reward/group_std_mean": 0.111886428296566, "signal/mean_confidence_reward/group_zero_std_frac": 0.0833333358168602, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.483316150886821e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.483316150886821e-07, "step": 185 }, { "calibration/aurc": 0.16341015205736542, "calibration/batch_distribution_entropy": 0.4751064047048981, "calibration/batch_entropy_100bins": 0.28294152401087824, "calibration/batch_entropy_10bins": 0.4751064047048981, "calibration/batch_entropy_50bins": 0.3330741841101023, "calibration/batch_uniqueness": 0.23920660908986213, "calibration/confidence_entropy": 0.5537682233129289, "calibration/coverage@0%": 0.0047257908550904825, "calibration/coverage@1%": 0.0047257908550904825, "calibration/coverage@10%": 0.12252683797550934, "calibration/coverage@15%": 0.5624311863974538, "calibration/coverage@20%": 0.6029575021869276, "calibration/coverage@25%": 0.9241356941957267, "calibration/coverage@30%": 0.9821989528795811, "calibration/coverage@5%": 0.12252683797550934, "calibration/distribution_entropy_10": 0.4751064047048981, "calibration/distribution_entropy_100": 0.28294152401087824, "calibration/ece": 0.13411462163037816, "calibration/mean_confidence": 0.636389035043661, "calibration/unique_confidence_per_question": 0.023437499999999997, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005381944444444442, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 770.71875, "completions/mean_terminated_length": 774.9037719726563, "completions/min_length": 0.0, "completions/min_terminated_length": 265.8, "epoch": 0.4567307692307692, "grad_norm": 0.0006286805146373808, "learning_rate": 4.567307692307692e-06, "loss": -0.0059, "num_tokens": 391665524.0, "reward": 1.246759819984436, "reward_std": 0.1368389680981636, "rewards/accuracy_reward": 0.6818576335906983, "rewards/brier_reward": 0.8174648761749268, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9941840291023254, "rewards/mean_confidence_reward": 0.6571961641311646, "sampling/batch_mean_priority_error": 0.047586199898986284, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6944444444444444, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005692024063318968, "sampling/priority_kl": 0.03000013455748558, "sampling/priority_scale": 0.8477106154197827, "sampling/prob_entropy": 10.278950881958007, "sampling/prob_max": 3.972284248447977e-05, "sampling/prob_min": 1.5739300579298288e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.451199996471405, "sampling/prompt_draws_total": 13536.0, "sampling/seen_fraction": 0.39632666707038877, "sampling/unseen_fraction": 0.6036733329296112, "signal/accuracy_reward/centered_abs_mean": 0.14089084267616273, "signal/accuracy_reward/group_std_mean": 0.1887511819601059, "signal/accuracy_reward/group_zero_std_frac": 0.4499999940395355, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07044542133808136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07044542133808136, "signal/advantage_abs_mean": 0.0984200268983841, "signal/advantage_pre_scale_abs_mean": 0.0984200268983841, "signal/advantage_pre_scale_std": 0.17806504666805267, "signal/advantage_std": 0.17806504666805267, "signal/brier_reward/centered_abs_mean": 0.09338046461343766, "signal/brier_reward/group_std_mean": 0.12290722280740737, "signal/brier_reward/group_zero_std_frac": 0.15000000447034836, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04669023230671883, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04669023230671883, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.010660807369276881, "signal/format_reward/group_std_mean": 0.024061377346515655, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005330403684638441, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005330403684638441, "signal/mean_confidence_reward/centered_abs_mean": 0.07968723326921463, "signal/mean_confidence_reward/group_std_mean": 0.10576378107070923, "signal/mean_confidence_reward/group_zero_std_frac": 0.17777778208255768, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.968723252815834e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.968723252815834e-07, "step": 190 }, { "calibration/aurc": 0.18329079440989587, "calibration/batch_distribution_entropy": 0.41771489644191373, "calibration/batch_entropy_100bins": 0.24362862638440336, "calibration/batch_entropy_10bins": 0.41771489644191373, "calibration/batch_entropy_50bins": 0.2867956771015705, "calibration/batch_uniqueness": 0.044625812665381105, "calibration/confidence_entropy": 0.5434150954937691, "calibration/coverage@0%": 0.012588820332088645, "calibration/coverage@1%": 0.012588820332088645, "calibration/coverage@10%": 0.1366529914550833, "calibration/coverage@15%": 0.3187713645579442, "calibration/coverage@20%": 0.3588475991818673, "calibration/coverage@25%": 0.8739051324242475, "calibration/coverage@30%": 0.9413612565445026, "calibration/coverage@5%": 0.10563694867433464, "calibration/distribution_entropy_10": 0.41771489644191373, "calibration/distribution_entropy_100": 0.24362862638440336, "calibration/ece": 0.09409223753500193, "calibration/mean_confidence": 0.6793051824351858, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0050347222222222095, "completions/max_length": 3301.4, "completions/max_terminated_length": 3301.4, "completions/mean_length": 730.9253540039062, "completions/mean_terminated_length": 734.6098022460938, "completions/min_length": 0.0, "completions/min_terminated_length": 236.8, "epoch": 0.46875, "grad_norm": 0.0006991397240199149, "learning_rate": 4.6875000000000004e-06, "loss": -0.0045, "num_tokens": 403195064.0, "reward": 1.2842619657516479, "reward_std": 0.13217405527830123, "rewards/accuracy_reward": 0.7235243201255799, "rewards/brier_reward": 0.8509754776954651, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9940104246139526, "rewards/mean_confidence_reward": 0.6825746297836304, "sampling/batch_mean_priority_error": 0.036192118732255085, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.711111111111111, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005736053269356489, "sampling/priority_kl": 0.029999953135848044, "sampling/priority_scale": 0.8396184146171436, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 3.984217764809728e-05, "sampling/prob_min": 1.5926705600577407e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.46320000290870667, "sampling/prompt_draws_total": 13896.0, "sampling/seen_fraction": 0.40477333664894105, "sampling/unseen_fraction": 0.595226663351059, "signal/accuracy_reward/centered_abs_mean": 0.13550889641046523, "signal/accuracy_reward/group_std_mean": 0.17976902425289154, "signal/accuracy_reward/group_zero_std_frac": 0.4888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06775444820523262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06775444820523262, "signal/advantage_abs_mean": 0.0955138310790062, "signal/advantage_pre_scale_abs_mean": 0.0955138310790062, "signal/advantage_pre_scale_std": 0.18250684440135956, "signal/advantage_std": 0.18250684440135956, "signal/brier_reward/centered_abs_mean": 0.08265027552843093, "signal/brier_reward/group_std_mean": 0.11094668358564377, "signal/brier_reward/group_zero_std_frac": 0.16944444477558135, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04132513776421547, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04132513776421547, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008083767141215503, "signal/confidence_one_or_zero/group_std_mean": 0.0018047165125608445, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.083766545041726e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.083766545041726e-09, "signal/format_reward/centered_abs_mean": 0.010519748367369176, "signal/format_reward/group_std_mean": 0.020848751068115234, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005259874183684588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005259874183684588, "signal/mean_confidence_reward/centered_abs_mean": 0.0648464672267437, "signal/mean_confidence_reward/group_std_mean": 0.08931698352098465, "signal/mean_confidence_reward/group_zero_std_frac": 0.1972222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.48464674668503e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.48464674668503e-07, "step": 195 }, { "calibration/aurc": 0.2065555207393436, "calibration/batch_distribution_entropy": 0.5505419111167639, "calibration/batch_entropy_100bins": 0.35560265388757684, "calibration/batch_entropy_10bins": 0.5505419111167639, "calibration/batch_entropy_50bins": 0.41860969055372027, "calibration/batch_uniqueness": 0.5245940876165731, "calibration/confidence_entropy": 0.5985963082906771, "calibration/coverage@0%": 0.0005208333333333333, "calibration/coverage@1%": 0.0005208333333333333, "calibration/coverage@10%": 0.3602311242766602, "calibration/coverage@15%": 0.5076925461559658, "calibration/coverage@20%": 0.5092550461559658, "calibration/coverage@25%": 0.5629008794892991, "calibration/coverage@30%": 0.5629008794892991, "calibration/coverage@5%": 0.2745240596583081, "calibration/distribution_entropy_10": 0.5505419111167639, "calibration/distribution_entropy_100": 0.35560265388757684, "calibration/ece": 0.14544184003857816, "calibration/mean_confidence": 0.5095282963626342, "calibration/unique_confidence_per_question": 0.022395833333333334, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005381944444444442, "completions/max_length": 3359.2, "completions/max_terminated_length": 3359.2, "completions/mean_length": 743.1461059570313, "completions/mean_terminated_length": 747.1810913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 216.6, "epoch": 0.4807692307692308, "grad_norm": 0.0005398523062467575, "learning_rate": 4.807692307692308e-06, "loss": -0.0049, "num_tokens": 414880587.0, "reward": 1.2484180927276611, "reward_std": 0.11528781056404114, "rewards/accuracy_reward": 0.6973090410232544, "rewards/brier_reward": 0.8050713539123535, "rewards/confidence_one_or_zero": 0.0006944444612599909, "rewards/format_reward": 0.9944444417953491, "rewards/mean_confidence_reward": 0.5597823977470398, "sampling/batch_mean_priority_error": 0.040991808575262516, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.675, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005779322423040867, "sampling/priority_kl": 0.029999599605798722, "sampling/priority_scale": 0.8319383798865602, "sampling/prob_entropy": 10.278953552246094, "sampling/prob_max": 3.995946171926334e-05, "sampling/prob_min": 1.610812141734641e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.47519999742507935, "sampling/prompt_draws_total": 14256.0, "sampling/seen_fraction": 0.41300666332244873, "sampling/unseen_fraction": 0.5869933366775513, "signal/accuracy_reward/centered_abs_mean": 0.13489040732383728, "signal/accuracy_reward/group_std_mean": 0.18231622874736786, "signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06744520366191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06744520366191864, "signal/advantage_abs_mean": 0.08110309839248657, "signal/advantage_pre_scale_abs_mean": 0.08110309839248657, "signal/advantage_pre_scale_std": 0.1474313974380493, "signal/advantage_std": 0.1474313974380493, "signal/brier_reward/centered_abs_mean": 0.08825185149908066, "signal/brier_reward/group_std_mean": 0.11570080667734146, "signal/brier_reward/group_zero_std_frac": 0.055555556528270246, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04412592574954033, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04412592574954033, "signal/confidence_one_or_zero/centered_abs_mean": 0.0012695312267169355, "signal/confidence_one_or_zero/group_std_mean": 0.00259860772639513, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.2695311824018063e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.2695311824018063e-08, "signal/format_reward/centered_abs_mean": 0.010199652798473836, "signal/format_reward/group_std_mean": 0.021548817306756972, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005099826399236918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005099826399236918, "signal/mean_confidence_reward/centered_abs_mean": 0.07937933951616287, "signal/mean_confidence_reward/group_std_mean": 0.10239440947771072, "signal/mean_confidence_reward/group_zero_std_frac": 0.058333334140479566, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.937933673929365e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.937933673929365e-07, "step": 200 }, { "epoch": 0.4807692307692308, "eval_calibration/aurc": 0.16129420631495728, "eval_calibration/batch_distribution_entropy": 0.5326029169461522, "eval_calibration/batch_entropy_100bins": 0.316344106044224, "eval_calibration/batch_entropy_10bins": 0.5326029169461522, "eval_calibration/batch_entropy_50bins": 0.3723951632305077, "eval_calibration/batch_uniqueness": 0.37980282603306575, "eval_calibration/confidence_entropy": 0.5775948003888051, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.4847161572052402, "eval_calibration/coverage@15%": 0.6349344978165938, "eval_calibration/coverage@20%": 0.6349344978165938, "eval_calibration/coverage@25%": 0.8724890829694323, "eval_calibration/coverage@30%": 0.9973799126637555, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5326029169461522, "eval_calibration/distribution_entropy_100": 0.316344106044224, "eval_calibration/ece": 0.05973799126637561, "eval_calibration/mean_confidence": 0.6391266375545851, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.0026041666666666665, "eval_completions/max_length": 2023.0, "eval_completions/max_terminated_length": 2023.0, "eval_completions/mean_length": 741.9574788411459, "eval_completions/mean_terminated_length": 743.9277547200521, "eval_completions/min_length": 184.5, "eval_completions/min_terminated_length": 290.5, "eval_loss": 0.0, "eval_num_tokens": 414880587.0, "eval_reward": 1.2596179842948914, "eval_reward_std": 0.2900431454181671, "eval_rewards/accuracy_reward": 0.6944444477558136, "eval_rewards/brier_reward": 0.8308550417423248, "eval_rewards/confidence_one_or_zero": 0.0017361111628512542, "eval_rewards/format_reward": 0.9939236144224802, "eval_rewards/mean_confidence_reward": 0.6352430582046509, "eval_runtime": 172.7696, "eval_samples_per_second": 5.788, "eval_signal/accuracy_reward/centered_abs_mean": 0.4055989633003871, "eval_signal/accuracy_reward/group_std_mean": 0.45562995473543805, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20279948165019354, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20279948165019354, "eval_signal/advantage_abs_mean": 0.24742527306079865, "eval_signal/advantage_pre_scale_abs_mean": 0.24742527306079865, "eval_signal/advantage_pre_scale_std": 0.2890806297461192, "eval_signal/advantage_std": 0.2890806297461192, "eval_signal/brier_reward/centered_abs_mean": 0.13593003898859024, "eval_signal/brier_reward/group_std_mean": 0.1747422789533933, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06796501949429512, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.06796501949429512, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "eval_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "eval_signal/format_reward/centered_abs_mean": 0.011447482276707888, "eval_signal/format_reward/group_std_mean": 0.02786809237052997, "eval_signal/format_reward/group_zero_std_frac": 0.8611111243565878, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005723741138353944, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.005723741138353944, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.16506347556908926, "eval_signal/mean_confidence_reward/group_std_mean": 0.19205797463655472, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6506346772378795e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6506346772378795e-06, "eval_steps_per_second": 0.035, "step": 200 }, { "epoch": 0.4807692307692308, "step": 200, "train_probe_calibration/aurc": 0.14485941592010415, "train_probe_calibration/batch_distribution_entropy": 0.5516900974776684, "train_probe_calibration/batch_entropy_100bins": 0.329665641102721, "train_probe_calibration/batch_entropy_10bins": 0.5516900974776684, "train_probe_calibration/batch_entropy_50bins": 0.3880770587607389, "train_probe_calibration/batch_uniqueness": 0.41108156757398834, "train_probe_calibration/confidence_entropy": 0.5772922826691077, "train_probe_calibration/coverage@0%": 0.002617801047120419, "train_probe_calibration/coverage@1%": 0.002617801047120419, "train_probe_calibration/coverage@10%": 0.4755671902268761, "train_probe_calibration/coverage@15%": 0.6492146596858639, "train_probe_calibration/coverage@20%": 0.6492146596858639, "train_probe_calibration/coverage@25%": 0.8673647469458988, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.002617801047120419, "train_probe_calibration/distribution_entropy_10": 0.5516900974776684, "train_probe_calibration/distribution_entropy_100": 0.329665641102721, "train_probe_calibration/ece": 0.08909249563699832, "train_probe_calibration/mean_confidence": 0.6388307155322861, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.0034722222222222285, "train_probe_completions/max_length": 2349.0, "train_probe_completions/max_terminated_length": 2349.0, "train_probe_completions/mean_length": 758.0345764160156, "train_probe_completions/mean_terminated_length": 760.6690470377604, "train_probe_completions/min_length": 107.83333333333333, "train_probe_completions/min_terminated_length": 233.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 414880587.0, "train_probe_reward": 1.2768206199010212, "train_probe_reward_std": 0.282529279589653, "train_probe_rewards/accuracy_reward": 0.7222222288449606, "train_probe_rewards/brier_reward": 0.836614578962326, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9947916666666666, "train_probe_rewards/mean_confidence_reward": 0.6355034708976746, "train_probe_runtime": 185.1737, "train_probe_samples_per_second": 5.4, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3904079894224803, "train_probe_signal/accuracy_reward/group_std_mean": 0.44654062887032825, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19520399471124014, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19520399471124014, "train_probe_signal/advantage_abs_mean": 0.23706388225158057, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23706388225158057, "train_probe_signal/advantage_pre_scale_std": 0.2810659855604172, "train_probe_signal/advantage_std": 0.2810659855604172, "train_probe_signal/brier_reward/centered_abs_mean": 0.13185519352555275, "train_probe_signal/brier_reward/group_std_mean": 0.17271685103575388, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06592759676277637, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.06592759676277637, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.009982638681928316, "train_probe_signal/format_reward/group_std_mean": 0.026473373795549076, "train_probe_signal/format_reward/group_zero_std_frac": 0.8611111342906952, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004991319340964158, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004991319340964158, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.16340060532093048, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.19176515191793442, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6340060445448519e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6340060445448519e-06, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.19018132797279527, "calibration/batch_distribution_entropy": 0.44957798572890495, "calibration/batch_entropy_100bins": 0.26184080956041245, "calibration/batch_entropy_10bins": 0.44957798572890495, "calibration/batch_entropy_50bins": 0.3082347644657134, "calibration/batch_uniqueness": 0.12632964276617803, "calibration/confidence_entropy": 0.5583439692211252, "calibration/coverage@0%": 0.0015984030434751163, "calibration/coverage@1%": 0.0015984030434751163, "calibration/coverage@10%": 0.2676351484502993, "calibration/coverage@15%": 0.2676351484502993, "calibration/coverage@20%": 0.2949317363768085, "calibration/coverage@25%": 0.8166204747775374, "calibration/coverage@30%": 0.9447403590741317, "calibration/coverage@5%": 0.2088970907075171, "calibration/distribution_entropy_10": 0.44957798572890495, "calibration/distribution_entropy_100": 0.26184080956041245, "calibration/ece": 0.09613425195341234, "calibration/mean_confidence": 0.6744759805451221, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007031249999999978, "completions/max_length": 2448.8, "completions/max_terminated_length": 2448.8, "completions/mean_length": 721.4182250976562, "completions/mean_terminated_length": 726.6839233398438, "completions/min_length": 38.6, "completions/min_terminated_length": 213.6, "epoch": 0.49278846153846156, "grad_norm": 0.000698321033269167, "learning_rate": 4.927884615384616e-06, "loss": -0.0069, "num_tokens": 426280477.0, "reward": 1.2819555044174193, "reward_std": 0.13396106362342836, "rewards/accuracy_reward": 0.7280381798744202, "rewards/brier_reward": 0.8430637955665589, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9927951335906983, "rewards/mean_confidence_reward": 0.6937847256660461, "sampling/batch_mean_priority_error": 0.03715462012595625, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6888888888888889, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005817731469869613, "sampling/priority_kl": 0.02999994046986103, "sampling/priority_scale": 0.8246608435874805, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 4.007791067124344e-05, "sampling/prob_min": 1.6206031432375312e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.487200003862381, "sampling/prompt_draws_total": 14616.0, "sampling/seen_fraction": 0.4212199985980988, "sampling/unseen_fraction": 0.5787800014019012, "signal/accuracy_reward/centered_abs_mean": 0.13053928017616273, "signal/accuracy_reward/group_std_mean": 0.17609834372997285, "signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06526964008808137, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06526964008808137, "signal/advantage_abs_mean": 0.09769376963377, "signal/advantage_pre_scale_abs_mean": 0.09769376963377, "signal/advantage_pre_scale_std": 0.18367029428482057, "signal/advantage_std": 0.18367029428482057, "signal/brier_reward/centered_abs_mean": 0.07794546037912368, "signal/brier_reward/group_std_mean": 0.10496081113815307, "signal/brier_reward/group_zero_std_frac": 0.23055556267499924, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03897273018956184, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03897273018956184, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.01070421002805233, "signal/format_reward/group_std_mean": 0.02027086652815342, "signal/format_reward/group_zero_std_frac": 0.9166666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005352105014026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005352105014026165, "signal/mean_confidence_reward/centered_abs_mean": 0.054693204909563066, "signal/mean_confidence_reward/group_std_mean": 0.07309356331825256, "signal/mean_confidence_reward/group_zero_std_frac": 0.2972222313284874, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.469320342399442e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.469320342399442e-07, "step": 205 }, { "calibration/aurc": 0.20358953124556783, "calibration/batch_distribution_entropy": 0.30459638634612196, "calibration/batch_entropy_100bins": 0.16357802323120227, "calibration/batch_entropy_10bins": 0.30459638634612196, "calibration/batch_entropy_50bins": 0.19256140227752971, "calibration/batch_uniqueness": -0.3185311723570564, "calibration/confidence_entropy": 0.5280267888744263, "calibration/coverage@0%": 0.008444148936170213, "calibration/coverage@1%": 0.008444148936170213, "calibration/coverage@10%": 0.17302748226950354, "calibration/coverage@15%": 0.19698581560283687, "calibration/coverage@20%": 0.7025530522446252, "calibration/coverage@25%": 0.754122340425532, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.008444148936170213, "calibration/distribution_entropy_10": 0.30459638634612196, "calibration/distribution_entropy_100": 0.16357802323120227, "calibration/ece": 0.08418190912281812, "calibration/mean_confidence": 0.7395930984643754, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888906, "completions/max_length": 3212.2, "completions/max_terminated_length": 3212.2, "completions/mean_length": 779.1319580078125, "completions/mean_terminated_length": 782.6882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 250.2, "epoch": 0.5048076923076923, "grad_norm": 0.0009137173765338957, "learning_rate": 4.987980769230769e-06, "loss": -0.0034, "num_tokens": 438366413.0, "reward": 1.2391991853713988, "reward_std": 0.1426423504948616, "rewards/accuracy_reward": 0.6729166626930236, "rewards/brier_reward": 0.8111093878746033, "rewards/confidence_one_or_zero": 0.0004340277868323028, "rewards/format_reward": 0.9943576455116272, "rewards/mean_confidence_reward": 0.732387113571167, "sampling/batch_mean_priority_error": 0.05414736051964284, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6833333333333333, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005871547013521194, "sampling/priority_kl": 0.030000075325369836, "sampling/priority_scale": 0.8178263127570972, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 4.019822226837277e-05, "sampling/prob_min": 1.6377108840970324e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.49920000433921813, "sampling/prompt_draws_total": 14976.0, "sampling/seen_fraction": 0.42946000695228576, "sampling/unseen_fraction": 0.5705399930477142, "signal/accuracy_reward/centered_abs_mean": 0.12706163227558137, "signal/accuracy_reward/group_std_mean": 0.17422983646392823, "signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06353081613779069, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06353081613779069, "signal/advantage_abs_mean": 0.10214050263166427, "signal/advantage_pre_scale_abs_mean": 0.10214050263166427, "signal/advantage_pre_scale_std": 0.19253622591495514, "signal/advantage_std": 0.19253622591495514, "signal/brier_reward/centered_abs_mean": 0.08288372755050659, "signal/brier_reward/group_std_mean": 0.11273103803396226, "signal/brier_reward/group_zero_std_frac": 0.29444445073604586, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.041441863775253295, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.041441863775253295, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008192274020984769, "signal/confidence_one_or_zero/group_std_mean": 0.0018573501612991095, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.192274236762388e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.192274236762388e-09, "signal/format_reward/centered_abs_mean": 0.01031358502805233, "signal/format_reward/group_std_mean": 0.02181966695934534, "signal/format_reward/group_zero_std_frac": 0.9027777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005156792514026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005156792514026165, "signal/mean_confidence_reward/centered_abs_mean": 0.050381416082382204, "signal/mean_confidence_reward/group_std_mean": 0.06970751732587814, "signal/mean_confidence_reward/group_zero_std_frac": 0.39166667461395266, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.038141409841046e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.038141409841046e-07, "step": 210 }, { "calibration/aurc": 0.19383833760635832, "calibration/batch_distribution_entropy": 0.319087947530728, "calibration/batch_entropy_100bins": 0.17407298528855475, "calibration/batch_entropy_10bins": 0.319087947530728, "calibration/batch_entropy_50bins": 0.20491590180438157, "calibration/batch_uniqueness": -0.2964349564207656, "calibration/confidence_entropy": 0.5218764549310007, "calibration/coverage@0%": 0.012703504052971526, "calibration/coverage@1%": 0.012703504052971526, "calibration/coverage@10%": 0.21165638363412337, "calibration/coverage@15%": 0.3772813836341234, "calibration/coverage@20%": 0.5916749338624339, "calibration/coverage@25%": 0.7656332671957673, "calibration/coverage@30%": 0.8005291005291006, "calibration/coverage@5%": 0.18966685483831186, "calibration/distribution_entropy_10": 0.319087947530728, "calibration/distribution_entropy_100": 0.17407298528855475, "calibration/ece": 0.1256729673216957, "calibration/mean_confidence": 0.7253777652148183, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111094, "completions/max_length": 2856.6, "completions/max_terminated_length": 2856.6, "completions/mean_length": 772.7975830078125, "completions/mean_terminated_length": 778.4013061523438, "completions/min_length": 0.0, "completions/min_terminated_length": 241.8, "epoch": 0.5168269230769231, "grad_norm": 0.0008226765785366297, "learning_rate": 4.957932692307692e-06, "loss": -0.0055, "num_tokens": 450353393.0, "reward": 1.269078254699707, "reward_std": 0.1451116070151329, "rewards/accuracy_reward": 0.7244791626930237, "rewards/brier_reward": 0.8210409998893737, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9926215291023255, "rewards/mean_confidence_reward": 0.7352039813995361, "sampling/batch_mean_priority_error": 0.052838198647012245, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0059312603436410425, "sampling/priority_kl": 0.029999976605176927, "sampling/priority_scale": 0.8115076482063159, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 4.0320714469999074e-05, "sampling/prob_min": 1.6541368677280843e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5111999988555909, "sampling/prompt_draws_total": 15336.0, "sampling/seen_fraction": 0.4377600014209747, "sampling/unseen_fraction": 0.5622399985790253, "signal/accuracy_reward/centered_abs_mean": 0.13215061128139496, "signal/accuracy_reward/group_std_mean": 0.180630099773407, "signal/accuracy_reward/group_zero_std_frac": 0.4611111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06607530564069748, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06607530564069748, "signal/advantage_abs_mean": 0.10373011976480484, "signal/advantage_pre_scale_abs_mean": 0.10373011976480484, "signal/advantage_pre_scale_std": 0.19957785606384276, "signal/advantage_std": 0.19957785606384276, "signal/brier_reward/centered_abs_mean": 0.08286137729883195, "signal/brier_reward/group_std_mean": 0.11241399496793747, "signal/brier_reward/group_zero_std_frac": 0.3277777791023254, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04143068864941597, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04143068864941597, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008300781133584678, "signal/confidence_one_or_zero/group_std_mean": 0.0021562909241765738, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.300780862668944e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.300780862668944e-09, "signal/format_reward/centered_abs_mean": 0.01175672747194767, "signal/format_reward/group_std_mean": 0.024286934360861778, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005878363735973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005878363735973835, "signal/mean_confidence_reward/centered_abs_mean": 0.04709501713514328, "signal/mean_confidence_reward/group_std_mean": 0.06495100557804108, "signal/mean_confidence_reward/group_zero_std_frac": 0.45277778506278993, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.709501695288054e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.709501695288054e-07, "step": 215 }, { "calibration/aurc": 0.23251423646772898, "calibration/batch_distribution_entropy": 0.3889181188463192, "calibration/batch_entropy_100bins": 0.2171264230417509, "calibration/batch_entropy_10bins": 0.3889181188463192, "calibration/batch_entropy_50bins": 0.2555977121286575, "calibration/batch_uniqueness": -0.07294295671342735, "calibration/confidence_entropy": 0.539465534455553, "calibration/coverage@0%": 0.004742396889320927, "calibration/coverage@1%": 0.004742396889320927, "calibration/coverage@10%": 0.12825514476467503, "calibration/coverage@15%": 0.44024514615337107, "calibration/coverage@20%": 0.49824820128460284, "calibration/coverage@25%": 0.5766150672337371, "calibration/coverage@30%": 0.5766150672337371, "calibration/coverage@5%": 0.004742396889320927, "calibration/distribution_entropy_10": 0.3889181188463192, "calibration/distribution_entropy_100": 0.2171264230417509, "calibration/ece": 0.11262302634465074, "calibration/mean_confidence": 0.6882683367362527, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00659722222222221, "completions/max_length": 3048.4, "completions/max_terminated_length": 3048.4, "completions/mean_length": 868.336474609375, "completions/mean_terminated_length": 874.126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 208.6, "epoch": 0.5288461538461539, "grad_norm": 0.0008023114060051739, "learning_rate": 4.927884615384616e-06, "loss": -0.0069, "num_tokens": 463435221.0, "reward": 1.2563817501068115, "reward_std": 0.14041633605957032, "rewards/accuracy_reward": 0.6907118082046508, "rewards/brier_reward": 0.8314127564430237, "rewards/confidence_one_or_zero": 0.0013020833546761423, "rewards/format_reward": 0.9906249880790711, "rewards/mean_confidence_reward": 0.6941753387451172, "sampling/batch_mean_priority_error": 0.04295309574473831, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6777777777777778, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.005983918812125922, "sampling/priority_kl": 0.02999996654689312, "sampling/priority_scale": 0.8054787575965747, "sampling/prob_entropy": 10.278954124450683, "sampling/prob_max": 4.044408997287974e-05, "sampling/prob_min": 1.670217352511827e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.523199999332428, "sampling/prompt_draws_total": 15696.0, "sampling/seen_fraction": 0.44600666165351865, "sampling/unseen_fraction": 0.5539933383464813, "signal/accuracy_reward/centered_abs_mean": 0.1377658426761627, "signal/accuracy_reward/group_std_mean": 0.180549818277359, "signal/accuracy_reward/group_zero_std_frac": 0.4916666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06888292133808135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06888292133808135, "signal/advantage_abs_mean": 0.10294992327690125, "signal/advantage_pre_scale_abs_mean": 0.10294992327690125, "signal/advantage_pre_scale_std": 0.19110060930252076, "signal/advantage_std": 0.19110060930252076, "signal/brier_reward/centered_abs_mean": 0.08256558924913407, "signal/brier_reward/group_std_mean": 0.11115524917840958, "signal/brier_reward/group_zero_std_frac": 0.30833334028720855, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.041282794624567035, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.041282794624567035, "signal/confidence_one_or_zero/centered_abs_mean": 0.0024034288129769267, "signal/confidence_one_or_zero/group_std_mean": 0.005086476355791092, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777791023254, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.4034287449126168e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.4034287449126168e-08, "signal/format_reward/centered_abs_mean": 0.01320529505610466, "signal/format_reward/group_std_mean": 0.02615750953555107, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00660264752805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00660264752805233, "signal/mean_confidence_reward/centered_abs_mean": 0.06290501430630684, "signal/mean_confidence_reward/group_std_mean": 0.08351254016160965, "signal/mean_confidence_reward/group_zero_std_frac": 0.36666667461395264, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.290501346484234e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.290501346484234e-07, "step": 220 }, { "calibration/aurc": 0.12652826645345755, "calibration/batch_distribution_entropy": 0.46529389381214703, "calibration/batch_entropy_100bins": 0.26020387344041745, "calibration/batch_entropy_10bins": 0.46529389381214703, "calibration/batch_entropy_50bins": 0.30630778975065986, "calibration/batch_uniqueness": 0.23280162375100857, "calibration/confidence_entropy": 0.5795074087022364, "calibration/coverage@0%": 0.09057318935427575, "calibration/coverage@1%": 0.09580879144851659, "calibration/coverage@10%": 0.6117064790575916, "calibration/coverage@15%": 0.8069298038016214, "calibration/coverage@20%": 0.8168774477806788, "calibration/coverage@25%": 0.8611482811140121, "calibration/coverage@30%": 0.8611482811140121, "calibration/coverage@5%": 0.23067133980606855, "calibration/distribution_entropy_10": 0.46529389381214703, "calibration/distribution_entropy_100": 0.26020387344041745, "calibration/ece": 0.19566735272875574, "calibration/mean_confidence": 0.6187747341189015, "calibration/unique_confidence_per_question": 0.01979166666666667, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3369.0, "completions/max_terminated_length": 3369.0, "completions/mean_length": 886.9040893554687, "completions/mean_terminated_length": 890.4674682617188, "completions/min_length": 0.0, "completions/min_terminated_length": 221.8, "epoch": 0.5408653846153846, "grad_norm": 0.0006064812187105417, "learning_rate": 4.897836538461539e-06, "loss": -0.0029, "num_tokens": 476719620.0, "reward": 1.2701764583587647, "reward_std": 0.11144869178533554, "rewards/accuracy_reward": 0.720225703716278, "rewards/brier_reward": 0.8250629425048828, "rewards/confidence_one_or_zero": 0.0017361111240461469, "rewards/format_reward": 0.9950520753860473, "rewards/mean_confidence_reward": 0.604869794845581, "sampling/batch_mean_priority_error": 0.036432831587038625, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6083333333333334, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0060226328670978544, "sampling/priority_kl": 0.029999875649809837, "sampling/priority_scale": 0.799505728459917, "sampling/prob_entropy": 10.278953742980956, "sampling/prob_max": 4.055845565744676e-05, "sampling/prob_min": 1.685902716417331e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5351999998092651, "sampling/prompt_draws_total": 16056.0, "sampling/seen_fraction": 0.45362000465393065, "sampling/unseen_fraction": 0.5463799953460693, "signal/accuracy_reward/centered_abs_mean": 0.12177191823720931, "signal/accuracy_reward/group_std_mean": 0.16531633734703063, "signal/accuracy_reward/group_zero_std_frac": 0.5138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06088595911860466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06088595911860466, "signal/advantage_abs_mean": 0.07888294160366058, "signal/advantage_pre_scale_abs_mean": 0.07888294160366058, "signal/advantage_pre_scale_std": 0.1528717279434204, "signal/advantage_std": 0.1528717279434204, "signal/brier_reward/centered_abs_mean": 0.07004087418317795, "signal/brier_reward/group_std_mean": 0.09422325640916825, "signal/brier_reward/group_zero_std_frac": 0.1722222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035020437091588974, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035020437091588974, "signal/confidence_one_or_zero/centered_abs_mean": 0.002734375, "signal/confidence_one_or_zero/group_std_mean": 0.004100846825167537, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.7343747888153302e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.7343747888153302e-08, "signal/format_reward/centered_abs_mean": 0.008696831669658423, "signal/format_reward/group_std_mean": 0.018851579166948795, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004348415834829211, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004348415834829211, "signal/mean_confidence_reward/centered_abs_mean": 0.06936849504709244, "signal/mean_confidence_reward/group_std_mean": 0.09045136123895645, "signal/mean_confidence_reward/group_zero_std_frac": 0.18055555522441863, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.936849104022258e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.936849104022258e-07, "step": 225 }, { "calibration/aurc": 0.25964683947564937, "calibration/batch_distribution_entropy": 0.4298122178001658, "calibration/batch_entropy_100bins": 0.23594515591118478, "calibration/batch_entropy_10bins": 0.4298122178001658, "calibration/batch_entropy_50bins": 0.27775081997800827, "calibration/batch_uniqueness": 0.05135679139027812, "calibration/confidence_entropy": 0.5489465221626701, "calibration/coverage@0%": 0.15678039531122578, "calibration/coverage@1%": 0.15678039531122578, "calibration/coverage@10%": 0.18750956197789245, "calibration/coverage@15%": 0.2015720619778924, "calibration/coverage@20%": 0.4533445721373601, "calibration/coverage@25%": 0.6474226561268613, "calibration/coverage@30%": 0.6944200451608039, "calibration/coverage@5%": 0.15678039531122578, "calibration/distribution_entropy_10": 0.4298122178001658, "calibration/distribution_entropy_100": 0.23594515591118478, "calibration/ece": 0.13530504305269925, "calibration/mean_confidence": 0.6636817097652362, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005468749999999978, "completions/max_length": 2951.6, "completions/max_terminated_length": 2951.6, "completions/mean_length": 809.384814453125, "completions/mean_terminated_length": 813.767333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.5528846153846154, "grad_norm": 0.0007383719203062356, "learning_rate": 4.867788461538462e-06, "loss": -0.0054, "num_tokens": 489174581.0, "reward": 1.2462389707565307, "reward_std": 0.13540728837251664, "rewards/accuracy_reward": 0.6755208253860474, "rewards/brier_reward": 0.8228465676307678, "rewards/confidence_one_or_zero": 0.0008680555794853718, "rewards/format_reward": 0.9940972208976746, "rewards/mean_confidence_reward": 0.6592578172683716, "sampling/batch_mean_priority_error": 0.045255449784256524, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6194444444444444, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.006066837068647146, "sampling/priority_kl": 0.03000005558133125, "sampling/priority_scale": 0.793832951807417, "sampling/prob_entropy": 10.278955459594727, "sampling/prob_max": 4.067181362188421e-05, "sampling/prob_min": 1.701090259302873e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5472000002861023, "sampling/prompt_draws_total": 16416.0, "sampling/seen_fraction": 0.46108666658401487, "sampling/unseen_fraction": 0.5389133334159851, "signal/accuracy_reward/centered_abs_mean": 0.1392578139901161, "signal/accuracy_reward/group_std_mean": 0.1852429300546646, "signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06962890699505805, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06962890699505805, "signal/advantage_abs_mean": 0.09845461696386337, "signal/advantage_pre_scale_abs_mean": 0.09845461696386337, "signal/advantage_pre_scale_std": 0.18213818967342377, "signal/advantage_std": 0.18213818967342377, "signal/brier_reward/centered_abs_mean": 0.08291573077440262, "signal/brier_reward/group_std_mean": 0.11135628968477249, "signal/brier_reward/group_zero_std_frac": 0.23888889253139495, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04145786538720131, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04145786538720131, "signal/confidence_one_or_zero/centered_abs_mean": 0.0016276041395030915, "signal/confidence_one_or_zero/group_std_mean": 0.003662066720426083, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6276040426532744e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6276040426532744e-08, "signal/format_reward/centered_abs_mean": 0.010481770988553763, "signal/format_reward/group_std_mean": 0.022597668692469597, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005240885494276881, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005240885494276881, "signal/mean_confidence_reward/centered_abs_mean": 0.06652046516537666, "signal/mean_confidence_reward/group_std_mean": 0.08856887370347977, "signal/mean_confidence_reward/group_zero_std_frac": 0.2944444417953491, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.652046408817114e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.652046408817114e-07, "step": 230 }, { "calibration/aurc": 0.21916975459389473, "calibration/batch_distribution_entropy": 0.36100771717627633, "calibration/batch_entropy_100bins": 0.19316641280299807, "calibration/batch_entropy_10bins": 0.36100771717627633, "calibration/batch_entropy_50bins": 0.2273923757453154, "calibration/batch_uniqueness": -0.18844872173518384, "calibration/confidence_entropy": 0.508086913449876, "calibration/coverage@0%": 0.013162141012056872, "calibration/coverage@1%": 0.013162141012056872, "calibration/coverage@10%": 0.04981135567174273, "calibration/coverage@15%": 0.2021673766141511, "calibration/coverage@20%": 0.2131621410120569, "calibration/coverage@25%": 0.7295691211841004, "calibration/coverage@30%": 0.9237121188512386, "calibration/coverage@5%": 0.04981135567174273, "calibration/distribution_entropy_10": 0.36100771717627633, "calibration/distribution_entropy_100": 0.19316641280299807, "calibration/ece": 0.06566668041868481, "calibration/mean_confidence": 0.7343678993951881, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004427083333333348, "completions/max_length": 3289.6, "completions/max_terminated_length": 3289.6, "completions/mean_length": 761.0531372070312, "completions/mean_terminated_length": 764.4302368164062, "completions/min_length": 0.0, "completions/min_terminated_length": 180.8, "epoch": 0.5649038461538461, "grad_norm": 0.0006777613307349384, "learning_rate": 4.837740384615385e-06, "loss": -0.0046, "num_tokens": 501045081.0, "reward": 1.2459373474121094, "reward_std": 0.15442993640899658, "rewards/accuracy_reward": 0.6864583373069764, "rewards/brier_reward": 0.8100889563560486, "rewards/confidence_one_or_zero": 0.0006076389050576836, "rewards/format_reward": 0.9953125, "rewards/mean_confidence_reward": 0.7428732514381409, "sampling/batch_mean_priority_error": 0.06387436928556019, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6555555555555556, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.0061315404251217846, "sampling/priority_kl": 0.029999935254454612, "sampling/priority_scale": 0.7885626494651661, "sampling/prob_entropy": 10.278954887390137, "sampling/prob_max": 4.079045102116652e-05, "sampling/prob_min": 1.7144251614809036e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5592000126838684, "sampling/prompt_draws_total": 16776.0, "sampling/seen_fraction": 0.4687666654586792, "sampling/unseen_fraction": 0.5312333345413208, "signal/accuracy_reward/centered_abs_mean": 0.1421875014901161, "signal/accuracy_reward/group_std_mean": 0.1907571256160736, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07109375074505805, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07109375074505805, "signal/advantage_abs_mean": 0.1121572047472, "signal/advantage_pre_scale_abs_mean": 0.1121572047472, "signal/advantage_pre_scale_std": 0.2046825647354126, "signal/advantage_std": 0.2046825647354126, "signal/brier_reward/centered_abs_mean": 0.09700408428907395, "signal/brier_reward/group_std_mean": 0.1282535523176193, "signal/brier_reward/group_zero_std_frac": 0.11111111119389534, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.048502042144536975, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.048502042144536975, "signal/confidence_one_or_zero/centered_abs_mean": 0.0010145399603061378, "signal/confidence_one_or_zero/group_std_mean": 0.0015925956889986993, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.01453991874223e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.01453991874223e-08, "signal/format_reward/centered_abs_mean": 0.008778211940079928, "signal/format_reward/group_std_mean": 0.021230710670351982, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004389105970039964, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004389105970039964, "signal/mean_confidence_reward/centered_abs_mean": 0.062408319860696795, "signal/mean_confidence_reward/group_std_mean": 0.08640324026346206, "signal/mean_confidence_reward/group_zero_std_frac": 0.18055555671453477, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.240831908144173e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.240831908144173e-07, "step": 235 }, { "calibration/aurc": 0.1801804727272827, "calibration/batch_distribution_entropy": 0.44021960039486946, "calibration/batch_entropy_100bins": 0.23149426160615266, "calibration/batch_entropy_10bins": 0.44021960039486946, "calibration/batch_entropy_50bins": 0.27251129921699097, "calibration/batch_uniqueness": 0.07269222939658837, "calibration/confidence_entropy": 0.4649029441679257, "calibration/coverage@0%": 0.01994750656167979, "calibration/coverage@1%": 0.01994750656167979, "calibration/coverage@10%": 0.31052382105128745, "calibration/coverage@15%": 0.6252309032967183, "calibration/coverage@20%": 0.6445466126091638, "calibration/coverage@25%": 0.7958834692047656, "calibration/coverage@30%": 0.8183982329842932, "calibration/coverage@5%": 0.01994750656167979, "calibration/distribution_entropy_10": 0.44021960039486946, "calibration/distribution_entropy_100": 0.23149426160615266, "calibration/ece": 0.11495716548071525, "calibration/mean_confidence": 0.7526511497869869, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00477430555555558, "completions/max_length": 3072.4, "completions/max_terminated_length": 3072.4, "completions/mean_length": 743.3960205078125, "completions/mean_terminated_length": 746.9897094726563, "completions/min_length": 0.0, "completions/min_terminated_length": 147.6, "epoch": 0.5769230769230769, "grad_norm": 0.0007029211265034974, "learning_rate": 4.807692307692308e-06, "loss": -0.004, "num_tokens": 512686891.0, "reward": 1.2595340251922607, "reward_std": 0.14976598024368287, "rewards/accuracy_reward": 0.6979166626930237, "rewards/brier_reward": 0.8260842323303222, "rewards/confidence_one_or_zero": 0.0007812500058207661, "rewards/format_reward": 0.9950520873069764, "rewards/mean_confidence_reward": 0.7551562547683716, "sampling/batch_mean_priority_error": 0.06334802356864544, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6611111111111111, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.006200295034795999, "sampling/priority_kl": 0.02999994494020939, "sampling/priority_scale": 0.7836340844398364, "sampling/prob_entropy": 10.278955841064453, "sampling/prob_max": 4.0912131953518835e-05, "sampling/prob_min": 1.7265845599467866e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5711999893188476, "sampling/prompt_draws_total": 17136.0, "sampling/seen_fraction": 0.476526665687561, "sampling/unseen_fraction": 0.5234733343124389, "signal/accuracy_reward/centered_abs_mean": 0.12988281548023223, "signal/accuracy_reward/group_std_mean": 0.17232809364795684, "signal/accuracy_reward/group_zero_std_frac": 0.5055555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06494140774011611, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06494140774011611, "signal/advantage_abs_mean": 0.11209896206855774, "signal/advantage_pre_scale_abs_mean": 0.11209896206855774, "signal/advantage_pre_scale_std": 0.20345027148723602, "signal/advantage_std": 0.20345027148723602, "signal/brier_reward/centered_abs_mean": 0.10559439659118652, "signal/brier_reward/group_std_mean": 0.13607816100120546, "signal/brier_reward/group_zero_std_frac": 0.05000000149011612, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05279719829559326, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05279719829559326, "signal/confidence_one_or_zero/centered_abs_mean": 0.0014160156133584678, "signal/confidence_one_or_zero/group_std_mean": 0.0027380796149373055, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.4160154648834578e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.4160154648834578e-08, "signal/format_reward/centered_abs_mean": 0.00910915806889534, "signal/format_reward/group_std_mean": 0.020607628300786017, "signal/format_reward/group_zero_std_frac": 0.9027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00455457903444767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00455457903444767, "signal/mean_confidence_reward/centered_abs_mean": 0.07177354246377946, "signal/mean_confidence_reward/group_std_mean": 0.09573450088500976, "signal/mean_confidence_reward/group_zero_std_frac": 0.07500000149011612, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.177354063969688e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.177354063969688e-07, "step": 240 }, { "calibration/aurc": 0.11709602782683955, "calibration/batch_distribution_entropy": 0.5672294931403771, "calibration/batch_entropy_100bins": 0.3116243412218339, "calibration/batch_entropy_10bins": 0.5672294931403771, "calibration/batch_entropy_50bins": 0.36683913244674493, "calibration/batch_uniqueness": 0.28723508706075185, "calibration/confidence_entropy": 0.42654062776644575, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.6111260214833772, "calibration/coverage@15%": 0.7574644152004357, "calibration/coverage@20%": 0.8191817958759573, "calibration/coverage@25%": 0.8933654725177265, "calibration/coverage@30%": 0.9848563968668408, "calibration/coverage@5%": 0.08952879581151832, "calibration/distribution_entropy_10": 0.5672294931403771, "calibration/distribution_entropy_100": 0.3116243412218339, "calibration/ece": 0.058913449990573484, "calibration/mean_confidence": 0.7108805560567061, "calibration/unique_confidence_per_question": 0.025520833333333333, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888884, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 759.0476684570312, "completions/mean_terminated_length": 762.4732666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 154.4, "epoch": 0.5889423076923077, "grad_norm": 0.0005396166816353798, "learning_rate": 4.777644230769231e-06, "loss": -0.0049, "num_tokens": 524523792.0, "reward": 1.2858156204223632, "reward_std": 0.13980449438095094, "rewards/accuracy_reward": 0.7180555462837219, "rewards/brier_reward": 0.8593769550323487, "rewards/confidence_one_or_zero": 0.0010416666802484543, "rewards/format_reward": 0.9941840291023254, "rewards/mean_confidence_reward": 0.7379383563995361, "sampling/batch_mean_priority_error": 0.0541321167838522, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6694444444444445, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.006265456974506378, "sampling/priority_kl": 0.03000013791024685, "sampling/priority_scale": 0.7791448295814917, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 4.104044201085344e-05, "sampling/prob_min": 1.7405801918357612e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5832000017166138, "sampling/prompt_draws_total": 17496.0, "sampling/seen_fraction": 0.48456000089645385, "sampling/unseen_fraction": 0.5154399991035461, "signal/accuracy_reward/centered_abs_mean": 0.12662760615348817, "signal/accuracy_reward/group_std_mean": 0.1733509123325348, "signal/accuracy_reward/group_zero_std_frac": 0.47777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06331380307674409, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06331380307674409, "signal/advantage_abs_mean": 0.09974031746387482, "signal/advantage_pre_scale_abs_mean": 0.09974031746387482, "signal/advantage_pre_scale_std": 0.18898828327655792, "signal/advantage_std": 0.18898828327655792, "signal/brier_reward/centered_abs_mean": 0.0990310087800026, "signal/brier_reward/group_std_mean": 0.13221175074577332, "signal/brier_reward/group_zero_std_frac": 0.20277777761220933, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0495155043900013, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0495155043900013, "signal/confidence_one_or_zero/centered_abs_mean": 0.0019531249883584679, "signal/confidence_one_or_zero/group_std_mean": 0.004861733969300985, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9749999880790711, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.953124808551365e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.953124808551365e-08, "signal/format_reward/centered_abs_mean": 0.010606553871184587, "signal/format_reward/group_std_mean": 0.02274163216352463, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005303276935592294, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005303276935592294, "signal/mean_confidence_reward/centered_abs_mean": 0.08483045399188996, "signal/mean_confidence_reward/group_std_mean": 0.11241712123155594, "signal/mean_confidence_reward/group_zero_std_frac": 0.21388888657093047, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.48304478040518e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.48304478040518e-07, "step": 245 }, { "calibration/aurc": 0.1636056684986535, "calibration/batch_distribution_entropy": 0.6186128158819945, "calibration/batch_entropy_100bins": 0.3487107609376868, "calibration/batch_entropy_10bins": 0.6186128158819945, "calibration/batch_entropy_50bins": 0.4104966656830034, "calibration/batch_uniqueness": 0.429661900750044, "calibration/confidence_entropy": 0.45239855037732674, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.175, "calibration/coverage@10%": 0.40646542321116935, "calibration/coverage@15%": 0.47216677574171023, "calibration/coverage@20%": 0.5295729712041884, "calibration/coverage@25%": 0.73539212478185, "calibration/coverage@30%": 0.8873245757180157, "calibration/coverage@5%": 0.19947916666666668, "calibration/distribution_entropy_10": 0.6186128158819945, "calibration/distribution_entropy_100": 0.3487107609376868, "calibration/ece": 0.12899922009919845, "calibration/mean_confidence": 0.6672979726566239, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888862, "completions/max_length": 3556.0, "completions/max_terminated_length": 3556.0, "completions/mean_length": 787.1635498046875, "completions/mean_terminated_length": 790.782568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 153.4, "epoch": 0.6009615384615384, "grad_norm": 0.0005077553796581924, "learning_rate": 4.747596153846154e-06, "loss": -0.0035, "num_tokens": 536723724.0, "reward": 1.258297610282898, "reward_std": 0.13198113888502122, "rewards/accuracy_reward": 0.6806423544883728, "rewards/brier_reward": 0.8412350416183472, "rewards/confidence_one_or_zero": 0.0015625000465661286, "rewards/format_reward": 0.9947048664093018, "rewards/mean_confidence_reward": 0.6485720157623291, "sampling/batch_mean_priority_error": 0.05104182940510933, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6361111111111112, "sampling/error_ema_max": 0.12885397672653198, "sampling/error_ema_mean": 0.006326197646558285, "sampling/priority_kl": 0.02999974898993969, "sampling/priority_scale": 0.774927383591421, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 4.1167728340951726e-05, "sampling/prob_min": 1.7496926739113404e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5952000021934509, "sampling/prompt_draws_total": 17856.0, "sampling/seen_fraction": 0.492440003156662, "sampling/unseen_fraction": 0.507559996843338, "signal/accuracy_reward/centered_abs_mean": 0.1427029088139534, "signal/accuracy_reward/group_std_mean": 0.19304186701774598, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0713514544069767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0713514544069767, "signal/advantage_abs_mean": 0.09508290439844132, "signal/advantage_pre_scale_abs_mean": 0.09508290439844132, "signal/advantage_pre_scale_std": 0.17090495228767394, "signal/advantage_std": 0.17090495228767394, "signal/brier_reward/centered_abs_mean": 0.10632044672966004, "signal/brier_reward/group_std_mean": 0.1394522801041603, "signal/brier_reward/group_zero_std_frac": 0.11388889029622078, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05316022336483002, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05316022336483002, "signal/confidence_one_or_zero/centered_abs_mean": 0.002875434048473835, "signal/confidence_one_or_zero/group_std_mean": 0.006179308518767357, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.87543400645518e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.87543400645518e-08, "signal/format_reward/centered_abs_mean": 0.008837890625, "signal/format_reward/group_std_mean": 0.016633689403533936, "signal/format_reward/group_zero_std_frac": 0.9305555820465088, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0044189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0044189453125, "signal/mean_confidence_reward/centered_abs_mean": 0.09797850847244263, "signal/mean_confidence_reward/group_std_mean": 0.12640962302684783, "signal/mean_confidence_reward/group_zero_std_frac": 0.1305555559694767, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.797850339055004e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.797850339055004e-07, "step": 250 }, { "epoch": 0.6009615384615384, "eval_calibration/aurc": 0.14130535061583444, "eval_calibration/batch_distribution_entropy": 0.6530374121770919, "eval_calibration/batch_entropy_100bins": 0.36960797941874046, "eval_calibration/batch_entropy_10bins": 0.6530374121770919, "eval_calibration/batch_entropy_50bins": 0.4350965331647375, "eval_calibration/batch_uniqueness": 0.47262016049727446, "eval_calibration/confidence_entropy": 0.45554960791222726, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.4520905923344948, "eval_calibration/coverage@15%": 0.6890243902439024, "eval_calibration/coverage@20%": 0.7813588850174216, "eval_calibration/coverage@25%": 0.9059233449477352, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6530374121770919, "eval_calibration/distribution_entropy_100": 0.36960797941874046, "eval_calibration/ece": 0.03135888501742151, "eval_calibration/mean_confidence": 0.6818815331010453, "eval_calibration/unique_confidence_per_question": 0.009548611111111112, "eval_calibration/unique_confidences": 11, "eval_completions/clipped_ratio": 0.0034722222222222467, "eval_completions/max_length": 2078.3333333333335, "eval_completions/max_terminated_length": 2078.3333333333335, "eval_completions/mean_length": 772.7741495768229, "eval_completions/mean_terminated_length": 775.481201171875, "eval_completions/min_length": 95.66666666666667, "eval_completions/min_terminated_length": 241.0, "eval_loss": 0.0, "eval_num_tokens": 536723724.0, "eval_reward": 1.2739282846450806, "eval_reward_std": 0.29537801444530487, "eval_rewards/accuracy_reward": 0.7022569477558136, "eval_rewards/brier_reward": 0.8490581413110098, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.9965277910232544, "eval_rewards/mean_confidence_reward": 0.6795138816038767, "eval_runtime": 173.6277, "eval_samples_per_second": 5.759, "eval_signal/accuracy_reward/centered_abs_mean": 0.4012044270833333, "eval_signal/accuracy_reward/group_std_mean": 0.453156977891922, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20060221354166666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20060221354166666, "eval_signal/advantage_abs_mean": 0.25084766497214633, "eval_signal/advantage_pre_scale_abs_mean": 0.25084766497214633, "eval_signal/advantage_pre_scale_std": 0.29384053746859234, "eval_signal/advantage_std": 0.29384053746859234, "eval_signal/brier_reward/centered_abs_mean": 0.17003757506608963, "eval_signal/brier_reward/group_std_mean": 0.22511051098505655, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08501878753304482, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08501878753304482, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.006727430348594983, "eval_signal/format_reward/group_std_mean": 0.019641855110724766, "eval_signal/format_reward/group_zero_std_frac": 0.8888889153798422, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0033637151742974916, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.23637964576482773, "eval_signal/mean_confidence_reward/group_std_mean": 0.27153053879737854, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.363796359835154e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.363796359835154e-06, "eval_steps_per_second": 0.035, "step": 250 }, { "epoch": 0.6009615384615384, "step": 250, "train_probe_calibration/aurc": 0.12413227849884392, "train_probe_calibration/batch_distribution_entropy": 0.6596172202182647, "train_probe_calibration/batch_entropy_100bins": 0.3680004020914199, "train_probe_calibration/batch_entropy_10bins": 0.6596172202182647, "train_probe_calibration/batch_entropy_50bins": 0.43320411914539897, "train_probe_calibration/batch_uniqueness": 0.45417553734211713, "train_probe_calibration/confidence_entropy": 0.4511331460653976, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.46596858638743455, "train_probe_calibration/coverage@15%": 0.7390924956369983, "train_probe_calibration/coverage@20%": 0.7914485165794066, "train_probe_calibration/coverage@25%": 0.9685863874345549, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.6596172202182647, "train_probe_calibration/distribution_entropy_100": 0.3680004020914199, "train_probe_calibration/ece": 0.04528795811518314, "train_probe_calibration/mean_confidence": 0.6881326352530541, "train_probe_calibration/unique_confidence_per_question": 0.010416666666666666, "train_probe_calibration/unique_confidences": 12, "train_probe_completions/clipped_ratio": 0.008506944444444461, "train_probe_completions/max_length": 2231.0, "train_probe_completions/max_terminated_length": 2231.0, "train_probe_completions/mean_length": 773.5767618815104, "train_probe_completions/mean_terminated_length": 780.2432556152344, "train_probe_completions/min_length": 30.166666666666668, "train_probe_completions/min_terminated_length": 195.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 536723724.0, "train_probe_reward": 1.287823756535848, "train_probe_reward_std": 0.293773889541626, "train_probe_rewards/accuracy_reward": 0.7291666467984518, "train_probe_rewards/brier_reward": 0.8516753911972046, "train_probe_rewards/confidence_one_or_zero": 0.0017361111628512542, "train_probe_rewards/format_reward": 0.9947916766007742, "train_probe_rewards/mean_confidence_reward": 0.6845485866069794, "train_probe_runtime": 186.6636, "train_probe_samples_per_second": 5.357, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3862847139437993, "train_probe_signal/accuracy_reward/group_std_mean": 0.44479583700497943, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19314235697189966, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19314235697189966, "train_probe_signal/advantage_abs_mean": 0.24279569586118063, "train_probe_signal/advantage_pre_scale_abs_mean": 0.24279569586118063, "train_probe_signal/advantage_pre_scale_std": 0.292436346411705, "train_probe_signal/advantage_std": 0.292436346411705, "train_probe_signal/brier_reward/centered_abs_mean": 0.1708850140372912, "train_probe_signal/brier_reward/group_std_mean": 0.2263486310839653, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0854425070186456, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0854425070186456, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/format_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/format_reward/group_zero_std_frac": 0.8333333631356558, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.23532441755135855, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.27096671362717945, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.3532442128271214e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.3532442128271214e-06, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.10192401164575704, "calibration/batch_distribution_entropy": 0.5630897990918653, "calibration/batch_entropy_100bins": 0.3115759678912134, "calibration/batch_entropy_10bins": 0.5630897990918653, "calibration/batch_entropy_50bins": 0.3667821881446125, "calibration/batch_uniqueness": 0.2728590050713087, "calibration/confidence_entropy": 0.43422457263377695, "calibration/coverage@0%": 0.1171875, "calibration/coverage@1%": 0.3534325787401575, "calibration/coverage@10%": 0.6791543635170604, "calibration/coverage@15%": 0.7370529855643044, "calibration/coverage@20%": 0.7505946522309712, "calibration/coverage@25%": 0.8765625, "calibration/coverage@30%": 0.921875, "calibration/coverage@5%": 0.4003075787401575, "calibration/distribution_entropy_10": 0.5630897990918653, "calibration/distribution_entropy_100": 0.3115759678912134, "calibration/ece": 0.12341678969816257, "calibration/mean_confidence": 0.7173136072834646, "calibration/unique_confidence_per_question": 0.025520833333333333, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002517361111111116, "completions/max_length": 3401.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 781.3870727539063, "completions/mean_terminated_length": 783.37958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 177.2, "epoch": 0.6129807692307693, "grad_norm": 0.0004924332606606185, "learning_rate": 4.7175480769230775e-06, "loss": -0.0004, "num_tokens": 548826199.0, "reward": 1.2913060426712035, "reward_std": 0.12112262547016144, "rewards/accuracy_reward": 0.7307291626930237, "rewards/brier_reward": 0.8547330856323242, "rewards/confidence_one_or_zero": 0.0016493055794853718, "rewards/format_reward": 0.9971354126930236, "rewards/mean_confidence_reward": 0.7186979174613952, "sampling/batch_mean_priority_error": 0.0507233970853445, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6305555555555555, "sampling/error_ema_max": 0.12311078608036041, "sampling/error_ema_mean": 0.006377698108553887, "sampling/priority_kl": 0.029999877139925957, "sampling/priority_scale": 0.7706882416969165, "sampling/prob_entropy": 10.278953361511231, "sampling/prob_max": 4.12878165661823e-05, "sampling/prob_min": 1.7625923283048907e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6072000026702881, "sampling/prompt_draws_total": 18216.0, "sampling/seen_fraction": 0.4998066782951355, "sampling/unseen_fraction": 0.5001933217048645, "signal/accuracy_reward/centered_abs_mean": 0.12176649272441864, "signal/accuracy_reward/group_std_mean": 0.1668856054544449, "signal/accuracy_reward/group_zero_std_frac": 0.5055555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06088324636220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06088324636220932, "signal/advantage_abs_mean": 0.08548608571290969, "signal/advantage_pre_scale_abs_mean": 0.08548608571290969, "signal/advantage_pre_scale_std": 0.1691525399684906, "signal/advantage_std": 0.1691525399684906, "signal/brier_reward/centered_abs_mean": 0.0856711134314537, "signal/brier_reward/group_std_mean": 0.11683738827705384, "signal/brier_reward/group_zero_std_frac": 0.18611111640930175, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04283555671572685, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04283555671572685, "signal/confidence_one_or_zero/centered_abs_mean": 0.002620442712213844, "signal/confidence_one_or_zero/group_std_mean": 0.004098849650472402, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.6204425296327827e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.6204425296327827e-08, "signal/format_reward/centered_abs_mean": 0.00539822056889534, "signal/format_reward/group_std_mean": 0.013028488121926784, "signal/format_reward/group_zero_std_frac": 0.9361111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00269911028444767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00269911028444767, "signal/mean_confidence_reward/centered_abs_mean": 0.07201225459575653, "signal/mean_confidence_reward/group_std_mean": 0.09611042886972428, "signal/mean_confidence_reward/group_zero_std_frac": 0.21111111342906952, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.201225457720284e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.201225457720284e-07, "step": 255 }, { "calibration/aurc": 0.14828991525526353, "calibration/batch_distribution_entropy": 0.44975362442630307, "calibration/batch_entropy_100bins": 0.24745196947920672, "calibration/batch_entropy_10bins": 0.44975362442630307, "calibration/batch_entropy_50bins": 0.29129645473160004, "calibration/batch_uniqueness": 0.03987591217030008, "calibration/confidence_entropy": 0.4098157739993427, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.42926183660909684, "calibration/coverage@15%": 0.6075963331944033, "calibration/coverage@20%": 0.6421161978926266, "calibration/coverage@25%": 0.9016763637819739, "calibration/coverage@30%": 0.9607803963983823, "calibration/coverage@5%": 0.1234375, "calibration/distribution_entropy_10": 0.44975362442630307, "calibration/distribution_entropy_100": 0.24745196947920672, "calibration/ece": 0.0922259926249394, "calibration/mean_confidence": 0.767020542663863, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002690972222222232, "completions/max_length": 2928.2, "completions/max_terminated_length": 2928.2, "completions/mean_length": 778.8683227539062, "completions/mean_terminated_length": 780.974951171875, "completions/min_length": 0.0, "completions/min_terminated_length": 180.4, "epoch": 0.625, "grad_norm": 0.0004822597838938236, "learning_rate": 4.6875000000000004e-06, "loss": -0.0008, "num_tokens": 560880266.0, "reward": 1.300320601463318, "reward_std": 0.1322122886776924, "rewards/accuracy_reward": 0.7479166626930237, "rewards/brier_reward": 0.8555737972259522, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9971354246139527, "rewards/mean_confidence_reward": 0.7657639026641846, "sampling/batch_mean_priority_error": 0.05647566379535647, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6083333333333334, "sampling/error_ema_max": 0.11928199231624603, "sampling/error_ema_mean": 0.0064342057332396506, "sampling/priority_kl": 0.0299997441470623, "sampling/priority_scale": 0.7668565093772486, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 4.1415702435187994e-05, "sampling/prob_min": 1.775550190359354e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6192000031471252, "sampling/prompt_draws_total": 18576.0, "sampling/seen_fraction": 0.5074999928474426, "sampling/unseen_fraction": 0.4925000071525574, "signal/accuracy_reward/centered_abs_mean": 0.13364800214767455, "signal/accuracy_reward/group_std_mean": 0.17856489717960358, "signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06682400107383728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06682400107383728, "signal/advantage_abs_mean": 0.09538109004497528, "signal/advantage_pre_scale_abs_mean": 0.09538109004497528, "signal/advantage_pre_scale_std": 0.18343801498413087, "signal/advantage_std": 0.18343801498413087, "signal/brier_reward/centered_abs_mean": 0.08802536427974701, "signal/brier_reward/group_std_mean": 0.12009987086057664, "signal/brier_reward/group_zero_std_frac": 0.2805555522441864, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.044012682139873506, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.044012682139873506, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005289713619276881, "signal/format_reward/group_std_mean": 0.011597984097898006, "signal/format_reward/group_zero_std_frac": 0.9472222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0026448568096384406, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0026448568096384406, "signal/mean_confidence_reward/centered_abs_mean": 0.06126871481537819, "signal/mean_confidence_reward/group_std_mean": 0.0842192992568016, "signal/mean_confidence_reward/group_zero_std_frac": 0.3111111164093018, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.126871085143648e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.126871085143648e-07, "step": 260 }, { "calibration/aurc": 0.17156296594583784, "calibration/batch_distribution_entropy": 0.5713246895783481, "calibration/batch_entropy_100bins": 0.3150922720135943, "calibration/batch_entropy_10bins": 0.5713246895783481, "calibration/batch_entropy_50bins": 0.3709215244641552, "calibration/batch_uniqueness": 0.3472916017679485, "calibration/confidence_entropy": 0.4582822522468408, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.39228697203717355, "calibration/coverage@15%": 0.6017746302279601, "calibration/coverage@20%": 0.6673629242819843, "calibration/coverage@25%": 0.7258485639686685, "calibration/coverage@30%": 0.7550913838120105, "calibration/coverage@5%": 0.12189973614775726, "calibration/distribution_entropy_10": 0.5713246895783481, "calibration/distribution_entropy_100": 0.3150922720135943, "calibration/ece": 0.10203320370357609, "calibration/mean_confidence": 0.7285038118209939, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555536, "completions/max_length": 2905.8, "completions/max_terminated_length": 2905.8, "completions/mean_length": 828.4642456054687, "completions/mean_terminated_length": 831.7786865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 219.4, "epoch": 0.6370192307692307, "grad_norm": 0.0005070755141787231, "learning_rate": 4.657451923076923e-06, "loss": -0.0038, "num_tokens": 573521518.0, "reward": 1.2752347469329834, "reward_std": 0.12059136927127838, "rewards/accuracy_reward": 0.7099826455116272, "rewards/brier_reward": 0.8448992967605591, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9955729126930237, "rewards/mean_confidence_reward": 0.73536456823349, "sampling/batch_mean_priority_error": 0.053314529740651026, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6416666666666666, "sampling/error_ema_max": 0.11928199231624603, "sampling/error_ema_mean": 0.006500357948243618, "sampling/priority_kl": 0.030000258609652518, "sampling/priority_scale": 0.7631947935326024, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 4.154189373366535e-05, "sampling/prob_min": 1.7881768508232197e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6311999917030334, "sampling/prompt_draws_total": 18936.0, "sampling/seen_fraction": 0.5149933338165283, "sampling/unseen_fraction": 0.48500666618347166, "signal/accuracy_reward/centered_abs_mean": 0.1154568150639534, "signal/accuracy_reward/group_std_mean": 0.1554502695798874, "signal/accuracy_reward/group_zero_std_frac": 0.55, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0577284075319767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0577284075319767, "signal/advantage_abs_mean": 0.08649851530790328, "signal/advantage_pre_scale_abs_mean": 0.08649851530790328, "signal/advantage_pre_scale_std": 0.1742924690246582, "signal/advantage_std": 0.1742924690246582, "signal/brier_reward/centered_abs_mean": 0.08069736361503602, "signal/brier_reward/group_std_mean": 0.10713728815317154, "signal/brier_reward/group_zero_std_frac": 0.21944445073604585, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04034868180751801, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04034868180751801, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.008132595662027597, "signal/format_reward/group_std_mean": 0.017994208075106145, "signal/format_reward/group_zero_std_frac": 0.9166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0040662978310137985, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0040662978310137985, "signal/mean_confidence_reward/centered_abs_mean": 0.06501844376325608, "signal/mean_confidence_reward/group_std_mean": 0.08517636507749557, "signal/mean_confidence_reward/group_zero_std_frac": 0.24166666567325593, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.501843927253504e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.501843927253504e-07, "step": 265 }, { "calibration/aurc": 0.1724353102680505, "calibration/batch_distribution_entropy": 0.6705310899852044, "calibration/batch_entropy_100bins": 0.36594505639391034, "calibration/batch_entropy_10bins": 0.6705310899852044, "calibration/batch_entropy_50bins": 0.43078459944550557, "calibration/batch_uniqueness": 0.5443845630429488, "calibration/confidence_entropy": 0.5165166687743069, "calibration/coverage@0%": 0.023097112860892388, "calibration/coverage@1%": 0.023097112860892388, "calibration/coverage@10%": 0.4130767230293049, "calibration/coverage@15%": 0.5197839987121903, "calibration/coverage@20%": 0.6050889263380436, "calibration/coverage@25%": 0.6672000011857372, "calibration/coverage@30%": 0.8000747076369044, "calibration/coverage@5%": 0.14690370734908137, "calibration/distribution_entropy_10": 0.6705310899852044, "calibration/distribution_entropy_100": 0.36594505639391034, "calibration/ece": 0.11203265838968064, "calibration/mean_confidence": 0.6719259889611031, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005555555555555558, "completions/max_length": 3264.8, "completions/max_terminated_length": 3264.8, "completions/mean_length": 870.48125, "completions/mean_terminated_length": 875.4301147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 274.8, "epoch": 0.6490384615384616, "grad_norm": 0.0004944394459016621, "learning_rate": 4.627403846153847e-06, "loss": -0.0045, "num_tokens": 586663670.0, "reward": 1.2752594470977783, "reward_std": 0.1263850063085556, "rewards/accuracy_reward": 0.7150173544883728, "rewards/brier_reward": 0.8412169933319091, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9942708253860474, "rewards/mean_confidence_reward": 0.6813281416893006, "sampling/batch_mean_priority_error": 0.04256142612508279, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5972222222222222, "sampling/error_ema_max": 0.11928199231624603, "sampling/error_ema_mean": 0.006547117419540882, "sampling/priority_kl": 0.029999774321913718, "sampling/priority_scale": 0.7595782100455836, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 4.1664379386929797e-05, "sampling/prob_min": 1.800627251213882e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6432000041007996, "sampling/prompt_draws_total": 19296.0, "sampling/seen_fraction": 0.5221800208091736, "sampling/unseen_fraction": 0.4778199791908264, "signal/accuracy_reward/centered_abs_mean": 0.12918294370174407, "signal/accuracy_reward/group_std_mean": 0.1769692748785019, "signal/accuracy_reward/group_zero_std_frac": 0.46666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06459147185087204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06459147185087204, "signal/advantage_abs_mean": 0.08929220288991928, "signal/advantage_pre_scale_abs_mean": 0.08929220288991928, "signal/advantage_pre_scale_std": 0.17134988605976104, "signal/advantage_std": 0.17134988605976104, "signal/brier_reward/centered_abs_mean": 0.07582532465457917, "signal/brier_reward/group_std_mean": 0.10502927154302596, "signal/brier_reward/group_zero_std_frac": 0.05833333525806665, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037912662327289584, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037912662327289584, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01001519076526165, "signal/format_reward/group_std_mean": 0.020045911055058242, "signal/format_reward/group_zero_std_frac": 0.9138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005007595382630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005007595382630825, "signal/mean_confidence_reward/centered_abs_mean": 0.0677403338253498, "signal/mean_confidence_reward/group_std_mean": 0.08821502774953842, "signal/mean_confidence_reward/group_zero_std_frac": 0.06944444570690393, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.774032954126597e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.774032954126597e-07, "step": 270 }, { "calibration/aurc": 0.20918962012232867, "calibration/batch_distribution_entropy": 0.6260484810131134, "calibration/batch_entropy_100bins": 0.3459900673899717, "calibration/batch_entropy_10bins": 0.6260484810131134, "calibration/batch_entropy_50bins": 0.40729390925908593, "calibration/batch_uniqueness": 0.49000524103582255, "calibration/confidence_entropy": 0.4940136629397375, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.13594836422061554, "calibration/coverage@15%": 0.39209262202554085, "calibration/coverage@20%": 0.6095946243339592, "calibration/coverage@25%": 0.6824626807529294, "calibration/coverage@30%": 0.8821657109615225, "calibration/coverage@5%": 0.13594836422061554, "calibration/distribution_entropy_10": 0.6260484810131134, "calibration/distribution_entropy_100": 0.3459900673899717, "calibration/ece": 0.11715939758055464, "calibration/mean_confidence": 0.7095371226439267, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111116, "completions/max_length": 2745.8, "completions/max_terminated_length": 2745.8, "completions/mean_length": 861.8537353515625, "completions/mean_terminated_length": 866.7883422851562, "completions/min_length": 0.0, "completions/min_terminated_length": 203.4, "epoch": 0.6610576923076923, "grad_norm": 0.0007629328756593168, "learning_rate": 4.597355769230769e-06, "loss": -0.0056, "num_tokens": 599688769.0, "reward": 1.2769941091537476, "reward_std": 0.12798523157835007, "rewards/accuracy_reward": 0.7092013955116272, "rewards/brier_reward": 0.8505017399787903, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9942708373069763, "rewards/mean_confidence_reward": 0.716163182258606, "sampling/batch_mean_priority_error": 0.03905228317416913, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6000000000000001, "sampling/error_ema_max": 0.119870525598526, "sampling/error_ema_mean": 0.006589326169341802, "sampling/priority_kl": 0.030000057816505433, "sampling/priority_scale": 0.7562616884941236, "sampling/prob_entropy": 10.278950500488282, "sampling/prob_max": 4.179128518444486e-05, "sampling/prob_min": 1.8127395378542133e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6552000045776367, "sampling/prompt_draws_total": 19656.0, "sampling/seen_fraction": 0.5294933319091797, "sampling/unseen_fraction": 0.4705066680908203, "signal/accuracy_reward/centered_abs_mean": 0.11976996660232545, "signal/accuracy_reward/group_std_mean": 0.16761533915996552, "signal/accuracy_reward/group_zero_std_frac": 0.4888889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05988498330116272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05988498330116272, "signal/advantage_abs_mean": 0.08943271785974502, "signal/advantage_pre_scale_abs_mean": 0.08943271785974502, "signal/advantage_pre_scale_std": 0.1768662929534912, "signal/advantage_std": 0.1768662929534912, "signal/brier_reward/centered_abs_mean": 0.07352758347988128, "signal/brier_reward/group_std_mean": 0.10236799418926239, "signal/brier_reward/group_zero_std_frac": 0.08611111268401146, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03676379173994064, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03676379173994064, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01022135429084301, "signal/format_reward/group_std_mean": 0.020866769552230834, "signal/format_reward/group_zero_std_frac": 0.9083333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005110677145421505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005110677145421505, "signal/mean_confidence_reward/centered_abs_mean": 0.06516871526837349, "signal/mean_confidence_reward/group_std_mean": 0.08634473383426666, "signal/mean_confidence_reward/group_zero_std_frac": 0.0888888917863369, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.516871280837222e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.516871280837222e-07, "step": 275 }, { "calibration/aurc": 0.14039470835629952, "calibration/batch_distribution_entropy": 0.6472011937223521, "calibration/batch_entropy_100bins": 0.36074211480402196, "calibration/batch_entropy_10bins": 0.6472011937223521, "calibration/batch_entropy_50bins": 0.42465978078877853, "calibration/batch_uniqueness": 0.5245013563165913, "calibration/confidence_entropy": 0.49849425976719336, "calibration/coverage@0%": 0.06307277628032346, "calibration/coverage@1%": 0.14932614555256066, "calibration/coverage@10%": 0.42254102650494163, "calibration/coverage@15%": 0.6666587476947085, "calibration/coverage@20%": 0.7297859312195583, "calibration/coverage@25%": 0.7877999473920652, "calibration/coverage@30%": 0.8885936267555682, "calibration/coverage@5%": 0.2825934973045822, "calibration/distribution_entropy_10": 0.6472011937223521, "calibration/distribution_entropy_100": 0.36074211480402196, "calibration/ece": 0.10694779631862672, "calibration/mean_confidence": 0.6907570665460823, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444442, "completions/max_length": 3203.6, "completions/max_terminated_length": 3203.6, "completions/mean_length": 870.253662109375, "completions/mean_terminated_length": 879.0881958007812, "completions/min_length": 0.0, "completions/min_terminated_length": 217.2, "epoch": 0.6730769230769231, "grad_norm": 0.0005508947651833296, "learning_rate": 4.567307692307692e-06, "loss": -0.0091, "num_tokens": 612789739.0, "reward": 1.26469144821167, "reward_std": 0.14498854875564576, "rewards/accuracy_reward": 0.6958333373069763, "rewards/brier_reward": 0.8436918377876281, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.98984375, "rewards/mean_confidence_reward": 0.6993142485618591, "sampling/batch_mean_priority_error": 0.03846872324937353, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6222222222222221, "sampling/error_ema_max": 0.12075332552194595, "sampling/error_ema_mean": 0.006628950219601392, "sampling/priority_kl": 0.029999958351254463, "sampling/priority_scale": 0.7531239926582203, "sampling/prob_entropy": 10.27894992828369, "sampling/prob_max": 4.1917922499123963e-05, "sampling/prob_min": 1.824544197006617e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6672000050544739, "sampling/prompt_draws_total": 20016.0, "sampling/seen_fraction": 0.5366933345794678, "sampling/unseen_fraction": 0.4633066654205322, "signal/accuracy_reward/centered_abs_mean": 0.1374348968267441, "signal/accuracy_reward/group_std_mean": 0.1876349091529846, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06871744841337205, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06871744841337205, "signal/advantage_abs_mean": 0.10304587036371231, "signal/advantage_pre_scale_abs_mean": 0.10304587036371231, "signal/advantage_pre_scale_std": 0.19462815523147584, "signal/advantage_std": 0.19462815523147584, "signal/brier_reward/centered_abs_mean": 0.08086232841014862, "signal/brier_reward/group_std_mean": 0.11166905611753464, "signal/brier_reward/group_zero_std_frac": 0.08333333507180214, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04043116420507431, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04043116420507431, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01641167551279068, "signal/format_reward/group_std_mean": 0.029581551998853685, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00820583775639534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00820583775639534, "signal/mean_confidence_reward/centered_abs_mean": 0.06651312336325646, "signal/mean_confidence_reward/group_std_mean": 0.08824529945850372, "signal/mean_confidence_reward/group_zero_std_frac": 0.08888889104127884, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.65131210553227e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.65131210553227e-07, "step": 280 }, { "calibration/aurc": 0.14288680384347863, "calibration/batch_distribution_entropy": 0.6650315258167521, "calibration/batch_entropy_100bins": 0.37373616932584974, "calibration/batch_entropy_10bins": 0.6650315258167521, "calibration/batch_entropy_50bins": 0.4399561715298336, "calibration/batch_uniqueness": 0.5287880229643276, "calibration/confidence_entropy": 0.5082635866477374, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.10548302872062662, "calibration/coverage@10%": 0.4817853906989272, "calibration/coverage@15%": 0.7174201159144425, "calibration/coverage@20%": 0.7431254323607392, "calibration/coverage@25%": 0.8367832665460192, "calibration/coverage@30%": 0.8789591695119057, "calibration/coverage@5%": 0.20065376945375304, "calibration/distribution_entropy_10": 0.6650315258167521, "calibration/distribution_entropy_100": 0.37373616932584974, "calibration/ece": 0.09754575100242655, "calibration/mean_confidence": 0.6825601722922887, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02152777777777779, "completions/max_length": 3658.2, "completions/max_terminated_length": 3658.2, "completions/mean_length": 883.3541870117188, "completions/mean_terminated_length": 902.7584106445313, "completions/min_length": 0.0, "completions/min_terminated_length": 198.8, "epoch": 0.6850961538461539, "grad_norm": 0.034632738679647446, "learning_rate": 4.537259615384616e-06, "loss": -0.0225, "num_tokens": 626066267.0, "reward": 1.2527711153030396, "reward_std": 0.1646108791232109, "rewards/accuracy_reward": 0.6983506917953491, "rewards/brier_reward": 0.8310499072074891, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9761284708976745, "rewards/mean_confidence_reward": 0.6590017795562744, "sampling/batch_mean_priority_error": 0.03579175608843308, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5833333333333334, "sampling/error_ema_max": 0.12075332552194595, "sampling/error_ema_mean": 0.006667227391153574, "sampling/priority_kl": 0.02999993450939655, "sampling/priority_scale": 0.7503358900779858, "sampling/prob_entropy": 10.278953552246094, "sampling/prob_max": 4.205153236398473e-05, "sampling/prob_min": 1.8355593783780933e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6791999936103821, "sampling/prompt_draws_total": 20376.0, "sampling/seen_fraction": 0.5441400051116944, "sampling/unseen_fraction": 0.45585999488830564, "signal/accuracy_reward/centered_abs_mean": 0.1542697474360466, "signal/accuracy_reward/group_std_mean": 0.20294737815856934, "signal/accuracy_reward/group_zero_std_frac": 0.4277777761220932, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0771348737180233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0771348737180233, "signal/advantage_abs_mean": 0.11804689913988113, "signal/advantage_pre_scale_abs_mean": 0.11804689913988113, "signal/advantage_pre_scale_std": 0.2127869665622711, "signal/advantage_std": 0.2127869665622711, "signal/brier_reward/centered_abs_mean": 0.09634966850280761, "signal/brier_reward/group_std_mean": 0.12952123284339906, "signal/brier_reward/group_zero_std_frac": 0.05277777910232544, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04817483425140381, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04817483425140381, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0365831159055233, "signal/format_reward/group_std_mean": 0.061961127817630766, "signal/format_reward/group_zero_std_frac": 0.7694444477558136, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01829155795276165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01829155795276165, "signal/mean_confidence_reward/centered_abs_mean": 0.08290661871433258, "signal/mean_confidence_reward/group_std_mean": 0.10661832988262177, "signal/mean_confidence_reward/group_zero_std_frac": 0.06388889029622077, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.290661867249582e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.290661867249582e-07, "step": 285 }, { "calibration/aurc": 0.28724837900990297, "calibration/batch_distribution_entropy": 0.5682252479718652, "calibration/batch_entropy_100bins": 0.3655933130909036, "calibration/batch_entropy_10bins": 0.5682252479718652, "calibration/batch_entropy_50bins": 0.4205142224408179, "calibration/batch_uniqueness": 0.2985340044223322, "calibration/confidence_entropy": 0.4648818296102114, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4457751396648044, "calibration/coverage@15%": 0.5015770484171321, "calibration/coverage@20%": 0.6608335870759612, "calibration/coverage@25%": 0.7395583237306184, "calibration/coverage@30%": 0.7770234986945169, "calibration/coverage@5%": 0.13125, "calibration/distribution_entropy_10": 0.5682252479718652, "calibration/distribution_entropy_100": 0.3655933130909036, "calibration/ece": 0.1118047358055387, "calibration/mean_confidence": 0.6590319810025146, "calibration/unique_confidence_per_question": 0.051041666666666666, "calibration/unique_confidences": 19.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04357638888888891, "completions/max_length": 3553.8, "completions/max_terminated_length": 3553.8, "completions/mean_length": 739.6080718994141, "completions/mean_terminated_length": 754.0138488769531, "completions/min_length": 0.0, "completions/min_terminated_length": 159.4, "epoch": 0.6971153846153846, "grad_norm": 0.0005248843808658421, "learning_rate": 4.507211538461539e-06, "loss": -0.036, "num_tokens": 637696888.0, "reward": 1.13420969247818, "reward_std": 0.1965183973312378, "rewards/accuracy_reward": 0.5455729156732559, "rewards/brier_reward": 0.7833370208740235, "rewards/confidence_one_or_zero": 0.01180555522441864, "rewards/format_reward": 0.9394965291023254, "rewards/mean_confidence_reward": 0.6383805871009827, "sampling/batch_mean_priority_error": 0.06071788752333244, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5555555555555556, "sampling/error_ema_max": 0.12075332552194595, "sampling/error_ema_mean": 0.006719445157796144, "sampling/priority_kl": 0.029999922960996628, "sampling/priority_scale": 0.7473562896018848, "sampling/prob_entropy": 10.278954124450683, "sampling/prob_max": 4.217153327772394e-05, "sampling/prob_min": 1.759280130499974e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.6911999940872192, "sampling/prompt_draws_total": 20736.0, "sampling/seen_fraction": 0.5507933259010315, "sampling/unseen_fraction": 0.44920667409896853, "signal/accuracy_reward/centered_abs_mean": 0.10755750983953476, "signal/accuracy_reward/group_std_mean": 0.15215527266263962, "signal/accuracy_reward/group_zero_std_frac": 0.5250000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05377875491976738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05377875491976738, "signal/advantage_abs_mean": 0.14942664355039598, "signal/advantage_pre_scale_abs_mean": 0.14942664355039598, "signal/advantage_pre_scale_std": 0.23570600152015686, "signal/advantage_std": 0.23570600152015686, "signal/brier_reward/centered_abs_mean": 0.1448768064379692, "signal/brier_reward/group_std_mean": 0.18426230549812317, "signal/brier_reward/group_zero_std_frac": 0.19444445222616197, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0724384032189846, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0724384032189846, "signal/confidence_one_or_zero/centered_abs_mean": 0.02027994841337204, "signal/confidence_one_or_zero/group_std_mean": 0.03520326614379883, "signal/confidence_one_or_zero/group_zero_std_frac": 0.8694444477558136, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.027994696618407e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.027994696618407e-07, "signal/format_reward/centered_abs_mean": 0.0818847662769258, "signal/format_reward/group_std_mean": 0.10462786592543125, "signal/format_reward/group_zero_std_frac": 0.7166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0409423831384629, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0409423831384629, "signal/mean_confidence_reward/centered_abs_mean": 0.09347325339913368, "signal/mean_confidence_reward/group_std_mean": 0.12237994074821472, "signal/mean_confidence_reward/group_zero_std_frac": 0.23611111491918563, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.347324635200494e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.347324635200494e-07, "step": 290 }, { "calibration/aurc": 0.15274101278407712, "calibration/batch_distribution_entropy": 0.3099765802059592, "calibration/batch_entropy_100bins": 0.17101656435086784, "calibration/batch_entropy_10bins": 0.3099765802059592, "calibration/batch_entropy_50bins": 0.2013179325290131, "calibration/batch_uniqueness": -0.3301233201476169, "calibration/confidence_entropy": 0.38117362988310044, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.37017620603392887, "calibration/coverage@15%": 0.5733911431064572, "calibration/coverage@20%": 0.6, "calibration/coverage@25%": 0.759375, "calibration/coverage@30%": 0.9276041666666666, "calibration/coverage@5%": 0.3330622120760598, "calibration/distribution_entropy_10": 0.3099765802059592, "calibration/distribution_entropy_100": 0.17101656435086784, "calibration/ece": 0.10969831779512347, "calibration/mean_confidence": 0.8102238274005625, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0015625, "completions/max_length": 3030.8, "completions/max_terminated_length": 3030.8, "completions/mean_length": 902.4171875, "completions/mean_terminated_length": 903.819921875, "completions/min_length": 0.0, "completions/min_terminated_length": 208.6, "epoch": 0.7091346153846154, "grad_norm": 0.0005306955426931381, "learning_rate": 4.477163461538462e-06, "loss": 0.0003, "num_tokens": 651179902.0, "reward": 1.2625298500061035, "reward_std": 0.13462880551815032, "rewards/accuracy_reward": 0.69296875, "rewards/brier_reward": 0.833638048171997, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9984375, "rewards/mean_confidence_reward": 0.7773871302604676, "sampling/batch_mean_priority_error": 0.07865727422187206, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5194444444444445, "sampling/error_ema_max": 0.12075332552194595, "sampling/error_ema_mean": 0.006801041215658188, "sampling/priority_kl": 0.030000052228569985, "sampling/priority_scale": 0.7444859921699389, "sampling/prob_entropy": 10.278954315185548, "sampling/prob_max": 4.228979887557216e-05, "sampling/prob_min": 1.7701475371723063e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7032000064849854, "sampling/prompt_draws_total": 21096.0, "sampling/seen_fraction": 0.557260000705719, "sampling/unseen_fraction": 0.442739999294281, "signal/accuracy_reward/centered_abs_mean": 0.12332356721162796, "signal/accuracy_reward/group_std_mean": 0.16784208714962007, "signal/accuracy_reward/group_zero_std_frac": 0.5027777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06166178360581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06166178360581398, "signal/advantage_abs_mean": 0.0977450504899025, "signal/advantage_pre_scale_abs_mean": 0.0977450504899025, "signal/advantage_pre_scale_std": 0.18939689993858339, "signal/advantage_std": 0.18939689993858339, "signal/brier_reward/centered_abs_mean": 0.09247624278068542, "signal/brier_reward/group_std_mean": 0.12620881497859954, "signal/brier_reward/group_zero_std_frac": 0.32222222089767455, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04623812139034271, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04623812139034271, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.002951388875953853, "signal/format_reward/group_std_mean": 0.006992555782198906, "signal/format_reward/group_zero_std_frac": 0.9666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014756944379769265, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014756944379769265, "signal/mean_confidence_reward/centered_abs_mean": 0.0591384619474411, "signal/mean_confidence_reward/group_std_mean": 0.07959082275629044, "signal/mean_confidence_reward/group_zero_std_frac": 0.41666666865348817, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.913845825489261e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.913845825489261e-07, "step": 295 }, { "calibration/aurc": 0.17787271048902797, "calibration/batch_distribution_entropy": 0.5103355610682992, "calibration/batch_entropy_100bins": 0.2808516359903569, "calibration/batch_entropy_10bins": 0.5103355610682992, "calibration/batch_entropy_50bins": 0.33061400174644945, "calibration/batch_uniqueness": 0.17855177361487, "calibration/confidence_entropy": 0.4409625069876807, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4376278285465623, "calibration/coverage@15%": 0.4856396866840731, "calibration/coverage@20%": 0.5389033942558747, "calibration/coverage@25%": 0.7477806788511749, "calibration/coverage@30%": 0.9177083333333332, "calibration/coverage@5%": 0.10809399477806789, "calibration/distribution_entropy_10": 0.5103355610682992, "calibration/distribution_entropy_100": 0.2808516359903569, "calibration/ece": 0.1495082680591819, "calibration/mean_confidence": 0.7491250543951262, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001388888888888884, "completions/max_length": 3413.8, "completions/max_terminated_length": 3413.8, "completions/mean_length": 868.06884765625, "completions/mean_terminated_length": 869.28193359375, "completions/min_length": 0.0, "completions/min_terminated_length": 211.6, "epoch": 0.7211538461538461, "grad_norm": 0.0003403428418096155, "learning_rate": 4.447115384615385e-06, "loss": 0.0, "num_tokens": 664253655.0, "reward": 1.2810194969177247, "reward_std": 0.12045833468437195, "rewards/accuracy_reward": 0.7264756917953491, "rewards/brier_reward": 0.8370242953300476, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.998524296283722, "rewards/mean_confidence_reward": 0.7348958492279053, "sampling/batch_mean_priority_error": 0.06383506851105139, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5805555555555555, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.006883678119629621, "sampling/priority_kl": 0.029999998211860657, "sampling/priority_scale": 0.7418554007774218, "sampling/prob_entropy": 10.278953361511231, "sampling/prob_max": 4.2413204209879043e-05, "sampling/prob_min": 1.7807840413297528e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7152000069618225, "sampling/prompt_draws_total": 21456.0, "sampling/seen_fraction": 0.5638733267784118, "sampling/unseen_fraction": 0.43612667322158816, "signal/accuracy_reward/centered_abs_mean": 0.12248806357383728, "signal/accuracy_reward/group_std_mean": 0.16501044929027558, "signal/accuracy_reward/group_zero_std_frac": 0.5138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06124403178691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06124403178691864, "signal/advantage_abs_mean": 0.08717213496565819, "signal/advantage_pre_scale_abs_mean": 0.08717213496565819, "signal/advantage_pre_scale_std": 0.16690560579299926, "signal/advantage_std": 0.16690560579299926, "signal/brier_reward/centered_abs_mean": 0.08108467981219292, "signal/brier_reward/group_std_mean": 0.11095312237739563, "signal/brier_reward/group_zero_std_frac": 0.2527777820825577, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04054233990609646, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04054233990609646, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0028266058885492384, "signal/format_reward/group_std_mean": 0.007450965978205204, "signal/format_reward/group_zero_std_frac": 0.9611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014133029442746192, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014133029442746192, "signal/mean_confidence_reward/centered_abs_mean": 0.06620009019970893, "signal/mean_confidence_reward/group_std_mean": 0.08651411533355713, "signal/mean_confidence_reward/group_zero_std_frac": 0.3138888955116272, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.620008434765623e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.620008434765623e-07, "step": 300 }, { "epoch": 0.7211538461538461, "eval_calibration/aurc": 0.14878378298575334, "eval_calibration/batch_distribution_entropy": 0.6329108288230492, "eval_calibration/batch_entropy_100bins": 0.3565150525929009, "eval_calibration/batch_entropy_10bins": 0.6329108288230492, "eval_calibration/batch_entropy_50bins": 0.4196837515471404, "eval_calibration/batch_uniqueness": 0.44409782503858014, "eval_calibration/confidence_entropy": 0.46713917160786056, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.5894097222222222, "eval_calibration/coverage@20%": 0.7630208333333334, "eval_calibration/coverage@25%": 0.9088541666666666, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6329108288230492, "eval_calibration/distribution_entropy_100": 0.3565150525929009, "eval_calibration/ece": 0.031770833333333456, "eval_calibration/mean_confidence": 0.6878472222222223, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 2147.0, "eval_completions/max_terminated_length": 2147.0, "eval_completions/mean_length": 819.9404703776041, "eval_completions/mean_terminated_length": 819.9404703776041, "eval_completions/min_length": 268.6666666666667, "eval_completions/min_terminated_length": 268.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 664253655.0, "eval_reward": 1.278730869293213, "eval_reward_std": 0.2882691373427709, "eval_rewards/accuracy_reward": 0.7100694378217062, "eval_rewards/brier_reward": 0.8473784724871317, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 1.0, "eval_rewards/mean_confidence_reward": 0.6878472069899241, "eval_runtime": 124.8589, "eval_samples_per_second": 8.009, "eval_signal/accuracy_reward/centered_abs_mean": 0.3951822916666667, "eval_signal/accuracy_reward/group_std_mean": 0.4499455789724986, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19759114583333334, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19759114583333334, "eval_signal/advantage_abs_mean": 0.245485690732797, "eval_signal/advantage_pre_scale_abs_mean": 0.245485690732797, "eval_signal/advantage_pre_scale_std": 0.28670944770177204, "eval_signal/advantage_std": 0.28670944770177204, "eval_signal/brier_reward/centered_abs_mean": 0.16264214863379797, "eval_signal/brier_reward/group_std_mean": 0.2163154954711596, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08132107431689899, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08132107431689899, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.21807722747325897, "eval_signal/mean_confidence_reward/group_std_mean": 0.2531433776021004, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.180772374534475e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.180772374534475e-06, "eval_steps_per_second": 0.048, "step": 300 }, { "epoch": 0.7211538461538461, "step": 300, "train_probe_calibration/aurc": 0.11106416293807442, "train_probe_calibration/batch_distribution_entropy": 0.6166980952291539, "train_probe_calibration/batch_entropy_100bins": 0.346543914125164, "train_probe_calibration/batch_entropy_10bins": 0.6166980952291539, "train_probe_calibration/batch_entropy_50bins": 0.40794588867458925, "train_probe_calibration/batch_uniqueness": 0.41111171392746915, "train_probe_calibration/confidence_entropy": 0.4680949122400047, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.5954861111111112, "train_probe_calibration/coverage@15%": 0.7708333333333334, "train_probe_calibration/coverage@20%": 0.8585069444444444, "train_probe_calibration/coverage@25%": 0.9340277777777778, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.6166980952291539, "train_probe_calibration/distribution_entropy_100": 0.346543914125164, "train_probe_calibration/ece": 0.04765624999999997, "train_probe_calibration/mean_confidence": 0.6978298611111112, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 2437.5, "train_probe_completions/max_terminated_length": 2437.5, "train_probe_completions/mean_length": 839.3019307454427, "train_probe_completions/mean_terminated_length": 839.3019307454427, "train_probe_completions/min_length": 197.66666666666666, "train_probe_completions/min_terminated_length": 197.66666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 664253655.0, "train_probe_reward": 1.3032057682673137, "train_probe_reward_std": 0.2715952495733897, "train_probe_rewards/accuracy_reward": 0.7447916666666666, "train_probe_rewards/brier_reward": 0.8616058925787607, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/mean_confidence_reward": 0.6978298425674438, "train_probe_runtime": 143.155, "train_probe_samples_per_second": 6.985, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3674045105775197, "train_probe_signal/accuracy_reward/group_std_mean": 0.4328818420569102, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18370225528875986, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18370225528875986, "train_probe_signal/advantage_abs_mean": 0.22503923873106638, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22503923873106638, "train_probe_signal/advantage_pre_scale_std": 0.2698550472656886, "train_probe_signal/advantage_std": 0.2698550472656886, "train_probe_signal/brier_reward/centered_abs_mean": 0.14530708640813828, "train_probe_signal/brier_reward/group_std_mean": 0.1937358702222506, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07265354320406914, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07265354320406914, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.21216904123624167, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.24356854458649954, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.1216902723608655e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.1216902723608655e-06, "train_probe_steps_per_second": 0.042 }, { "calibration/aurc": 0.13489746861381913, "calibration/batch_distribution_entropy": 0.5663912873579798, "calibration/batch_entropy_100bins": 0.31968976387481074, "calibration/batch_entropy_10bins": 0.5663912873579798, "calibration/batch_entropy_50bins": 0.37633361749638417, "calibration/batch_uniqueness": 0.2982378782233609, "calibration/confidence_entropy": 0.4514197585485686, "calibration/coverage@0%": 0.1, "calibration/coverage@1%": 0.11197916666666667, "calibration/coverage@10%": 0.49733327893820717, "calibration/coverage@15%": 0.6512483681462141, "calibration/coverage@20%": 0.70863386640557, "calibration/coverage@25%": 0.8930292645778938, "calibration/coverage@30%": 0.9065709312445606, "calibration/coverage@5%": 0.3082381418624891, "calibration/distribution_entropy_10": 0.5663912873579798, "calibration/distribution_entropy_100": 0.31968976387481074, "calibration/ece": 0.11733763054830285, "calibration/mean_confidence": 0.7128647193211489, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0007812500000000222, "completions/max_length": 3041.8, "completions/max_terminated_length": 3041.8, "completions/mean_length": 869.4628540039063, "completions/mean_terminated_length": 870.12236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 180.4, "epoch": 0.7331730769230769, "grad_norm": 0.0006279823137447238, "learning_rate": 4.4170673076923085e-06, "loss": 0.0003, "num_tokens": 677379851.0, "reward": 1.2907153844833374, "reward_std": 0.11878290176391601, "rewards/accuracy_reward": 0.7276041626930236, "rewards/brier_reward": 0.8545937538146973, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.999218738079071, "rewards/mean_confidence_reward": 0.7068228960037232, "sampling/batch_mean_priority_error": 0.045256717291975654, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5555555555555556, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.006935930624604225, "sampling/priority_kl": 0.03000014051795006, "sampling/priority_scale": 0.7394883930915966, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 4.2540891445241866e-05, "sampling/prob_min": 1.7829657736001536e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7271999955177307, "sampling/prompt_draws_total": 21816.0, "sampling/seen_fraction": 0.5706066608428955, "sampling/unseen_fraction": 0.4293933391571045, "signal/accuracy_reward/centered_abs_mean": 0.133094622194767, "signal/accuracy_reward/group_std_mean": 0.17181747853755952, "signal/accuracy_reward/group_zero_std_frac": 0.5250000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0665473110973835, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0665473110973835, "signal/advantage_abs_mean": 0.09010841697454453, "signal/advantage_pre_scale_abs_mean": 0.09010841697454453, "signal/advantage_pre_scale_std": 0.16782027184963227, "signal/advantage_std": 0.16782027184963227, "signal/brier_reward/centered_abs_mean": 0.07871444821357727, "signal/brier_reward/group_std_mean": 0.10398375689983368, "signal/brier_reward/group_zero_std_frac": 0.18333333134651184, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03935722410678864, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03935722410678864, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0014702690648846328, "signal/format_reward/group_std_mean": 0.003469961090013385, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007351345324423164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007351345324423164, "signal/mean_confidence_reward/centered_abs_mean": 0.06634602397680282, "signal/mean_confidence_reward/group_std_mean": 0.08630629777908325, "signal/mean_confidence_reward/group_zero_std_frac": 0.21388888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.634602186750271e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.634602186750271e-07, "step": 305 }, { "calibration/aurc": 0.1590332219273491, "calibration/batch_distribution_entropy": 0.5941489175456391, "calibration/batch_entropy_100bins": 0.32546430178528685, "calibration/batch_entropy_10bins": 0.5941489175456391, "calibration/batch_entropy_50bins": 0.38313131009335605, "calibration/batch_uniqueness": 0.3948182590011388, "calibration/confidence_entropy": 0.4614033471335378, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.35505738685813754, "calibration/coverage@15%": 0.4348020017406441, "calibration/coverage@20%": 0.6619329308093995, "calibration/coverage@25%": 0.8572644691035685, "calibration/coverage@30%": 0.9447916666666666, "calibration/coverage@5%": 0.14203655352480418, "calibration/distribution_entropy_10": 0.5941489175456391, "calibration/distribution_entropy_100": 0.32546430178528685, "calibration/ece": 0.10625652741514358, "calibration/mean_confidence": 0.7430390557006092, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009548611111111161, "completions/max_length": 3017.6, "completions/max_terminated_length": 3017.6, "completions/mean_length": 802.1393188476562, "completions/mean_terminated_length": 802.9186279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 226.4, "epoch": 0.7451923076923077, "grad_norm": 0.0005608039791695774, "learning_rate": 4.3870192307692315e-06, "loss": -0.0001, "num_tokens": 689739984.0, "reward": 1.2944949626922608, "reward_std": 0.11501410156488419, "rewards/accuracy_reward": 0.731249988079071, "rewards/brier_reward": 0.8586796998977662, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9990451216697693, "rewards/mean_confidence_reward": 0.7596788287162781, "sampling/batch_mean_priority_error": 0.0495483870967742, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5083333333333333, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.0069914465770125386, "sampling/priority_kl": 0.030000025779008864, "sampling/priority_scale": 0.7370796263450756, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 4.266466785338707e-05, "sampling/prob_min": 1.7870402007247322e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7391999959945679, "sampling/prompt_draws_total": 22176.0, "sampling/seen_fraction": 0.5770333290100098, "sampling/unseen_fraction": 0.42296667098999025, "signal/accuracy_reward/centered_abs_mean": 0.10629340261220932, "signal/accuracy_reward/group_std_mean": 0.15068531036376953, "signal/accuracy_reward/group_zero_std_frac": 0.5277777731418609, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05314670130610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05314670130610466, "signal/advantage_abs_mean": 0.08016361147165299, "signal/advantage_pre_scale_abs_mean": 0.08016361147165299, "signal/advantage_pre_scale_std": 0.16271564960479737, "signal/advantage_std": 0.16271564960479737, "signal/brier_reward/centered_abs_mean": 0.07374851629137993, "signal/brier_reward/group_std_mean": 0.10301677286624908, "signal/brier_reward/group_zero_std_frac": 0.12222222536802292, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03687425814568997, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03687425814568997, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0018283419660292566, "signal/format_reward/group_std_mean": 0.004803628288209438, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009141709830146283, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009141709830146283, "signal/mean_confidence_reward/centered_abs_mean": 0.06200139597058296, "signal/mean_confidence_reward/group_std_mean": 0.08194329738616943, "signal/mean_confidence_reward/group_zero_std_frac": 0.14722222685813904, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.200139523571125e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.200139523571125e-07, "step": 310 }, { "calibration/aurc": 0.13479263979375694, "calibration/batch_distribution_entropy": 0.5943594151592482, "calibration/batch_entropy_100bins": 0.320886936344589, "calibration/batch_entropy_10bins": 0.5943594151592482, "calibration/batch_entropy_50bins": 0.37774290955771883, "calibration/batch_uniqueness": 0.3900287770099487, "calibration/confidence_entropy": 0.46822462705007134, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.08177083333333333, "calibration/coverage@10%": 0.516490835492006, "calibration/coverage@15%": 0.6732616688253394, "calibration/coverage@20%": 0.7740828103691673, "calibration/coverage@25%": 0.9017757545931758, "calibration/coverage@30%": 0.9546136811023622, "calibration/coverage@5%": 0.153125, "calibration/distribution_entropy_10": 0.5943594151592482, "calibration/distribution_entropy_100": 0.320886936344589, "calibration/ece": 0.09571776724710983, "calibration/mean_confidence": 0.7316965047490801, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009548611111111383, "completions/max_length": 3089.4, "completions/max_terminated_length": 3089.4, "completions/mean_length": 820.1656372070313, "completions/mean_terminated_length": 820.9537719726562, "completions/min_length": 0.0, "completions/min_terminated_length": 233.6, "epoch": 0.7572115384615384, "grad_norm": 0.000576628081034869, "learning_rate": 4.356971153846154e-06, "loss": 0.0013, "num_tokens": 702283716.0, "reward": 1.3008342266082764, "reward_std": 0.12076780349016189, "rewards/accuracy_reward": 0.7386284828186035, "rewards/brier_reward": 0.8643272757530213, "rewards/confidence_one_or_zero": 0.0007812500174622983, "rewards/format_reward": 0.9986979126930237, "rewards/mean_confidence_reward": 0.7404079675674439, "sampling/batch_mean_priority_error": 0.04232408260335557, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5194444444444444, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.0070373106747865675, "sampling/priority_kl": 0.029999876394867897, "sampling/priority_scale": 0.7346825778251513, "sampling/prob_entropy": 10.278953742980956, "sampling/prob_max": 4.278513515600935e-05, "sampling/prob_min": 1.7971647685044446e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.751199996471405, "sampling/prompt_draws_total": 22536.0, "sampling/seen_fraction": 0.5832133173942566, "sampling/unseen_fraction": 0.4167866826057434, "signal/accuracy_reward/centered_abs_mean": 0.12458224892616272, "signal/accuracy_reward/group_std_mean": 0.1669006824493408, "signal/accuracy_reward/group_zero_std_frac": 0.5138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06229112446308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06229112446308136, "signal/advantage_abs_mean": 0.08788303434848785, "signal/advantage_pre_scale_abs_mean": 0.08788303434848785, "signal/advantage_pre_scale_std": 0.17004837095737457, "signal/advantage_std": 0.17004837095737457, "signal/brier_reward/centered_abs_mean": 0.07569783031940461, "signal/brier_reward/group_std_mean": 0.10299409478902817, "signal/brier_reward/group_zero_std_frac": 0.08611111342906952, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037848915159702304, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037848915159702304, "signal/confidence_one_or_zero/centered_abs_mean": 0.0014485677354969084, "signal/confidence_one_or_zero/group_std_mean": 0.0031183866783976557, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.4485675592368353e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.4485675592368353e-08, "signal/format_reward/centered_abs_mean": 0.0024793836753815414, "signal/format_reward/group_std_mean": 0.006416239216923714, "signal/format_reward/group_zero_std_frac": 0.9666666507720947, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012396918376907707, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012396918376907707, "signal/mean_confidence_reward/centered_abs_mean": 0.06360838785767556, "signal/mean_confidence_reward/group_std_mean": 0.08352760523557663, "signal/mean_confidence_reward/group_zero_std_frac": 0.11388889104127883, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.360838597174734e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.360838597174734e-07, "step": 315 }, { "calibration/aurc": 0.13073502348042956, "calibration/batch_distribution_entropy": 0.5765002360538408, "calibration/batch_entropy_100bins": 0.31331468009648444, "calibration/batch_entropy_10bins": 0.5765002360538408, "calibration/batch_entropy_50bins": 0.3688289720205298, "calibration/batch_uniqueness": 0.3522939916000804, "calibration/confidence_entropy": 0.43345649050027235, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5114032689477306, "calibration/coverage@15%": 0.7500894030889598, "calibration/coverage@20%": 0.7834609126875635, "calibration/coverage@25%": 0.8748218559617058, "calibration/coverage@30%": 0.9118513381201044, "calibration/coverage@5%": 0.16605744125326372, "calibration/distribution_entropy_10": 0.5765002360538408, "calibration/distribution_entropy_100": 0.31331468009648444, "calibration/ece": 0.0996640623932032, "calibration/mean_confidence": 0.7197991089736581, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888888838, "completions/max_length": 3174.0, "completions/max_terminated_length": 3174.0, "completions/mean_length": 847.2627563476562, "completions/mean_terminated_length": 849.2025634765625, "completions/min_length": 36.4, "completions/min_terminated_length": 219.8, "epoch": 0.7692307692307693, "grad_norm": 0.0004925581743009388, "learning_rate": 4.326923076923077e-06, "loss": -0.0007, "num_tokens": 715118423.0, "reward": 1.2951796770095825, "reward_std": 0.11276318728923798, "rewards/accuracy_reward": 0.7394965291023254, "rewards/brier_reward": 0.8530182361602783, "rewards/confidence_one_or_zero": 0.00086805559694767, "rewards/format_reward": 0.9978298544883728, "rewards/mean_confidence_reward": 0.7319531440734863, "sampling/batch_mean_priority_error": 0.05047228719001652, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.49444444444444446, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.007085711508989334, "sampling/priority_kl": 0.02999986745417118, "sampling/priority_scale": 0.7324336468940601, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 4.290657088859007e-05, "sampling/prob_min": 1.807057451514993e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7632000088691712, "sampling/prompt_draws_total": 22896.0, "sampling/seen_fraction": 0.5893399953842163, "sampling/unseen_fraction": 0.41066000461578367, "signal/accuracy_reward/centered_abs_mean": 0.11623806059360504, "signal/accuracy_reward/group_std_mean": 0.1584540769457817, "signal/accuracy_reward/group_zero_std_frac": 0.5249999940395356, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05811903029680252, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05811903029680252, "signal/advantage_abs_mean": 0.08010389134287835, "signal/advantage_pre_scale_abs_mean": 0.08010389134287835, "signal/advantage_pre_scale_std": 0.1603192687034607, "signal/advantage_std": 0.1603192687034607, "signal/brier_reward/centered_abs_mean": 0.07845103666186333, "signal/brier_reward/group_std_mean": 0.10729333162307739, "signal/brier_reward/group_zero_std_frac": 0.10277777835726738, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039225518330931665, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.039225518330931665, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011935763992369174, "signal/confidence_one_or_zero/group_std_mean": 0.0013081363402307033, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1935763666315324e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1935763666315324e-08, "signal/format_reward/centered_abs_mean": 0.0036295573459938168, "signal/format_reward/group_std_mean": 0.007045127963647246, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018147786729969084, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0018147786729969084, "signal/mean_confidence_reward/centered_abs_mean": 0.068427724391222, "signal/mean_confidence_reward/group_std_mean": 0.09000243991613388, "signal/mean_confidence_reward/group_zero_std_frac": 0.11111111417412758, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.842772108939243e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.842772108939243e-07, "step": 320 }, { "calibration/aurc": 0.13901186236414578, "calibration/batch_distribution_entropy": 0.49057468410709976, "calibration/batch_entropy_100bins": 0.27327159981394644, "calibration/batch_entropy_10bins": 0.49057468410709976, "calibration/batch_entropy_50bins": 0.32169090580353693, "calibration/batch_uniqueness": 0.12938756523973152, "calibration/confidence_entropy": 0.4157152736968742, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4143706369982548, "calibration/coverage@15%": 0.6253739664926022, "calibration/coverage@20%": 0.8666517080069627, "calibration/coverage@25%": 0.9114583333333334, "calibration/coverage@30%": 0.971875, "calibration/coverage@5%": 0.07291666666666666, "calibration/distribution_entropy_10": 0.49057468410709976, "calibration/distribution_entropy_100": 0.27327159981394644, "calibration/ece": 0.10245163746417334, "calibration/mean_confidence": 0.753003616991101, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0012152777777777902, "completions/max_length": 3229.4, "completions/max_terminated_length": 3229.4, "completions/mean_length": 874.606689453125, "completions/mean_terminated_length": 875.6975708007812, "completions/min_length": 43.0, "completions/min_terminated_length": 218.4, "epoch": 0.78125, "grad_norm": 0.0005906697479076684, "learning_rate": 4.296875e-06, "loss": 0.0005, "num_tokens": 728261700.0, "reward": 1.2714888095855712, "reward_std": 0.13772770762443542, "rewards/accuracy_reward": 0.709375, "rewards/brier_reward": 0.8348029732704163, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9987847208976746, "rewards/mean_confidence_reward": 0.7502343893051148, "sampling/batch_mean_priority_error": 0.0678190865203879, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5083333333333333, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.007151188887655735, "sampling/priority_kl": 0.030000296607613563, "sampling/priority_scale": 0.7303502858849242, "sampling/prob_entropy": 10.278953742980956, "sampling/prob_max": 4.302758679841645e-05, "sampling/prob_min": 1.81500847247662e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7751999974250794, "sampling/prompt_draws_total": 23256.0, "sampling/seen_fraction": 0.5953666567802429, "sampling/unseen_fraction": 0.4046333432197571, "signal/accuracy_reward/centered_abs_mean": 0.1295681416988373, "signal/accuracy_reward/group_std_mean": 0.18199051022529603, "signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06478407084941865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06478407084941865, "signal/advantage_abs_mean": 0.09816538095474243, "signal/advantage_pre_scale_abs_mean": 0.09816538095474243, "signal/advantage_pre_scale_std": 0.18509195148944854, "signal/advantage_std": 0.18509195148944854, "signal/brier_reward/centered_abs_mean": 0.09397627711296082, "signal/brier_reward/group_std_mean": 0.12922771871089936, "signal/brier_reward/group_zero_std_frac": 0.1583333358168602, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04698813855648041, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04698813855648041, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.002289496478624642, "signal/format_reward/group_std_mean": 0.005327311297878623, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001144748239312321, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001144748239312321, "signal/mean_confidence_reward/centered_abs_mean": 0.06724716573953629, "signal/mean_confidence_reward/group_std_mean": 0.09011977612972259, "signal/mean_confidence_reward/group_zero_std_frac": 0.19722222536802292, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.724716740791337e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.724716740791337e-07, "step": 325 }, { "calibration/aurc": 0.15108210556594276, "calibration/batch_distribution_entropy": 0.4955656129923048, "calibration/batch_entropy_100bins": 0.27147566325949657, "calibration/batch_entropy_10bins": 0.4955656129923048, "calibration/batch_entropy_50bins": 0.3195767583496484, "calibration/batch_uniqueness": 0.0848271456646099, "calibration/confidence_entropy": 0.40205932981181736, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.29648954930078053, "calibration/coverage@15%": 0.6091366724536245, "calibration/coverage@20%": 0.7798693748034939, "calibration/coverage@25%": 0.8649695387293299, "calibration/coverage@30%": 0.9140244234116622, "calibration/coverage@5%": 0.09973890339425587, "calibration/distribution_entropy_10": 0.4955656129923048, "calibration/distribution_entropy_100": 0.27147566325949657, "calibration/ece": 0.10098273481607048, "calibration/mean_confidence": 0.7487735536592258, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 3020.2, "completions/max_terminated_length": 3020.2, "completions/mean_length": 851.8645874023438, "completions/mean_terminated_length": 852.2350708007813, "completions/min_length": 49.0, "completions/min_terminated_length": 218.0, "epoch": 0.7932692307692307, "grad_norm": 0.0005713775753974915, "learning_rate": 4.266826923076923e-06, "loss": 0.0, "num_tokens": 741202636.0, "reward": 1.263979959487915, "reward_std": 0.12538683861494065, "rewards/accuracy_reward": 0.6946180701255799, "rewards/brier_reward": 0.8337604522705078, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9995659589767456, "rewards/mean_confidence_reward": 0.7758159756660461, "sampling/batch_mean_priority_error": 0.07355097726254482, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.49444444444444435, "sampling/error_ema_max": 0.14215169847011566, "sampling/error_ema_mean": 0.007231994159519672, "sampling/priority_kl": 0.0299999613314867, "sampling/priority_scale": 0.7283593594795092, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 4.314823090680875e-05, "sampling/prob_min": 1.8233119772048666e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7871999979019165, "sampling/prompt_draws_total": 23616.0, "sampling/seen_fraction": 0.6012866616249084, "sampling/unseen_fraction": 0.39871333837509154, "signal/accuracy_reward/centered_abs_mean": 0.11459418386220932, "signal/accuracy_reward/group_std_mean": 0.15884294211864472, "signal/accuracy_reward/group_zero_std_frac": 0.522222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05729709193110466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05729709193110466, "signal/advantage_abs_mean": 0.09014993458986283, "signal/advantage_pre_scale_abs_mean": 0.09014993458986283, "signal/advantage_pre_scale_std": 0.17779311537742615, "signal/advantage_std": 0.17779311537742615, "signal/brier_reward/centered_abs_mean": 0.08623187392950057, "signal/brier_reward/group_std_mean": 0.11791259646415711, "signal/brier_reward/group_zero_std_frac": 0.24722222089767457, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04311593696475029, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04311593696475029, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0008409288129769266, "signal/format_reward/group_std_mean": 0.0024552317336201668, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644064884633, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004204644064884633, "signal/mean_confidence_reward/centered_abs_mean": 0.05817870870232582, "signal/mean_confidence_reward/group_std_mean": 0.07863243967294693, "signal/mean_confidence_reward/group_zero_std_frac": 0.3000000059604645, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.817870373903134e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.817870373903134e-07, "step": 330 }, { "calibration/aurc": 0.11525137357263186, "calibration/batch_distribution_entropy": 0.5396938674173277, "calibration/batch_entropy_100bins": 0.2985571907929471, "calibration/batch_entropy_10bins": 0.5396938674173277, "calibration/batch_entropy_50bins": 0.3514566943865821, "calibration/batch_uniqueness": 0.2981337965761003, "calibration/confidence_entropy": 0.45188166379727307, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.07864583333333333, "calibration/coverage@10%": 0.5964172479255806, "calibration/coverage@15%": 0.7365567714242751, "calibration/coverage@20%": 0.7610576961414205, "calibration/coverage@25%": 0.794422306672317, "calibration/coverage@30%": 0.8849982229026834, "calibration/coverage@5%": 0.442828002610966, "calibration/distribution_entropy_10": 0.5396938674173277, "calibration/distribution_entropy_100": 0.2985571907929471, "calibration/ece": 0.10572534690420521, "calibration/mean_confidence": 0.7414600043459145, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005208333333333482, "completions/max_length": 3505.2, "completions/max_terminated_length": 3505.2, "completions/mean_length": 867.2281372070313, "completions/mean_terminated_length": 867.6787475585937, "completions/min_length": 60.2, "completions/min_terminated_length": 218.4, "epoch": 0.8052884615384616, "grad_norm": 0.0005073934444226325, "learning_rate": 4.236778846153847e-06, "loss": 0.0017, "num_tokens": 754332176.0, "reward": 1.2985543012619019, "reward_std": 0.11553947627544403, "rewards/accuracy_reward": 0.7366319417953491, "rewards/brier_reward": 0.8609826326370239, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9994791507720947, "rewards/mean_confidence_reward": 0.7424305438995361, "sampling/batch_mean_priority_error": 0.04359199988799283, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.49444444444444446, "sampling/error_ema_max": 0.15842704474925995, "sampling/error_ema_mean": 0.0072890852577984335, "sampling/priority_kl": 0.029999826103448868, "sampling/priority_scale": 0.7265909253852442, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 4.327336573624052e-05, "sampling/prob_min": 1.8324282063986174e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7991999983787537, "sampling/prompt_draws_total": 23976.0, "sampling/seen_fraction": 0.607313334941864, "sampling/unseen_fraction": 0.392686665058136, "signal/accuracy_reward/centered_abs_mean": 0.11971571147441865, "signal/accuracy_reward/group_std_mean": 0.1570712596178055, "signal/accuracy_reward/group_zero_std_frac": 0.550000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05985785573720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05985785573720932, "signal/advantage_abs_mean": 0.0869685485959053, "signal/advantage_pre_scale_abs_mean": 0.0869685485959053, "signal/advantage_pre_scale_std": 0.16786086559295654, "signal/advantage_std": 0.16786086559295654, "signal/brier_reward/centered_abs_mean": 0.07740350812673569, "signal/brier_reward/group_std_mean": 0.10209325551986695, "signal/brier_reward/group_zero_std_frac": 0.1388888880610466, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.038701754063367844, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.038701754063367844, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0010091145755723118, "signal/format_reward/group_std_mean": 0.0029462780803442, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005045572877861559, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005045572877861559, "signal/mean_confidence_reward/centered_abs_mean": 0.0635205015540123, "signal/mean_confidence_reward/group_std_mean": 0.08177418410778045, "signal/mean_confidence_reward/group_zero_std_frac": 0.14444444328546524, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.352049808810988e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.352049808810988e-07, "step": 335 }, { "calibration/aurc": 0.12451635772769279, "calibration/batch_distribution_entropy": 0.6427127041171415, "calibration/batch_entropy_100bins": 0.3597033637976214, "calibration/batch_entropy_10bins": 0.6427127041171415, "calibration/batch_entropy_50bins": 0.42343697991089424, "calibration/batch_uniqueness": 0.46505167498570865, "calibration/confidence_entropy": 0.4800151184802828, "calibration/coverage@0%": 0.1671875, "calibration/coverage@1%": 0.1671875, "calibration/coverage@10%": 0.5347398816521537, "calibration/coverage@15%": 0.6217899470971798, "calibration/coverage@20%": 0.7536695706829066, "calibration/coverage@25%": 0.8006331193981563, "calibration/coverage@30%": 0.8701068237689045, "calibration/coverage@5%": 0.43518436245494607, "calibration/distribution_entropy_10": 0.6427127041171415, "calibration/distribution_entropy_100": 0.3597033637976214, "calibration/ece": 0.14596912037100324, "calibration/mean_confidence": 0.7001734506787146, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009548611111111161, "completions/max_length": 3395.2, "completions/max_terminated_length": 3395.2, "completions/mean_length": 935.81875, "completions/mean_terminated_length": 936.7029907226563, "completions/min_length": 64.6, "completions/min_terminated_length": 292.2, "epoch": 0.8173076923076923, "grad_norm": 0.0005600933800451458, "learning_rate": 4.20673076923077e-06, "loss": 0.002, "num_tokens": 768201608.0, "reward": 1.278798723220825, "reward_std": 0.10951859503984451, "rewards/accuracy_reward": 0.7026041626930237, "rewards/brier_reward": 0.8559340238571167, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9990451335906982, "rewards/mean_confidence_reward": 0.7040451526641845, "sampling/batch_mean_priority_error": 0.038162505133661886, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.48611111111111105, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007329526357352734, "sampling/priority_kl": 0.02999986633658409, "sampling/priority_scale": 0.7247206031577662, "sampling/prob_entropy": 10.278953361511231, "sampling/prob_max": 4.33936613262631e-05, "sampling/prob_min": 1.8415910381008872e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8111999988555908, "sampling/prompt_draws_total": 24336.0, "sampling/seen_fraction": 0.6130200028419495, "sampling/unseen_fraction": 0.38697999715805054, "signal/accuracy_reward/centered_abs_mean": 0.11417100727558135, "signal/accuracy_reward/group_std_mean": 0.1579512119293213, "signal/accuracy_reward/group_zero_std_frac": 0.5194444596767426, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05708550363779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05708550363779068, "signal/advantage_abs_mean": 0.0791636049747467, "signal/advantage_pre_scale_abs_mean": 0.0791636049747467, "signal/advantage_pre_scale_std": 0.15533441305160522, "signal/advantage_std": 0.15533441305160522, "signal/brier_reward/centered_abs_mean": 0.0714503824710846, "signal/brier_reward/group_std_mean": 0.09577226787805557, "signal/brier_reward/group_zero_std_frac": 0.12222222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0357251912355423, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0357251912355423, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0018391926889307797, "signal/format_reward/group_std_mean": 0.0051025690510869024, "signal/format_reward/group_zero_std_frac": 0.9722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009195963444653899, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009195963444653899, "signal/mean_confidence_reward/centered_abs_mean": 0.06465005502104759, "signal/mean_confidence_reward/group_std_mean": 0.0848843514919281, "signal/mean_confidence_reward/group_zero_std_frac": 0.13055555820465087, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.465005753852893e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.465005753852893e-07, "step": 340 }, { "calibration/aurc": 0.09457681193417841, "calibration/batch_distribution_entropy": 0.7022329895287385, "calibration/batch_entropy_100bins": 0.40857958618301726, "calibration/batch_entropy_10bins": 0.7022329895287385, "calibration/batch_entropy_50bins": 0.4809732780923296, "calibration/batch_uniqueness": 0.6365140520714129, "calibration/confidence_entropy": 0.5093651656548829, "calibration/coverage@0%": 0.33503605820746946, "calibration/coverage@1%": 0.36760606769325693, "calibration/coverage@10%": 0.5745122571916504, "calibration/coverage@15%": 0.6721027239758789, "calibration/coverage@20%": 0.9057291666666668, "calibration/coverage@25%": 0.9255208333333332, "calibration/coverage@30%": 0.9442708333333332, "calibration/coverage@5%": 0.5033105497188031, "calibration/distribution_entropy_10": 0.7022329895287385, "calibration/distribution_entropy_100": 0.40857958618301726, "calibration/ece": 0.1802956842027404, "calibration/mean_confidence": 0.6166906055436112, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002430555555555558, "completions/max_length": 3666.2, "completions/max_terminated_length": 3666.2, "completions/mean_length": 917.8632080078125, "completions/mean_terminated_length": 920.127734375, "completions/min_length": 0.0, "completions/min_terminated_length": 253.2, "epoch": 0.8293269230769231, "grad_norm": 0.000359359139110893, "learning_rate": 4.176682692307693e-06, "loss": -0.0005, "num_tokens": 781840704.0, "reward": 1.2887306213378906, "reward_std": 0.09829263091087341, "rewards/accuracy_reward": 0.7401041626930237, "rewards/brier_reward": 0.8397751927375794, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.997569465637207, "rewards/mean_confidence_reward": 0.6232031345367431, "sampling/batch_mean_priority_error": 0.037444839205341335, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4916666666666667, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007368226815015077, "sampling/priority_kl": 0.029999981075525282, "sampling/priority_scale": 0.7232257783180103, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 4.352282339823432e-05, "sampling/prob_min": 1.8503407045500352e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.823200011253357, "sampling/prompt_draws_total": 24696.0, "sampling/seen_fraction": 0.6190266728401184, "sampling/unseen_fraction": 0.3809733271598816, "signal/accuracy_reward/centered_abs_mean": 0.12099609225988388, "signal/accuracy_reward/group_std_mean": 0.16088730096817017, "signal/accuracy_reward/group_zero_std_frac": 0.5361111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06049804612994194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06049804612994194, "signal/advantage_abs_mean": 0.07144818976521491, "signal/advantage_pre_scale_abs_mean": 0.07144818976521491, "signal/advantage_pre_scale_std": 0.1398210719227791, "signal/advantage_std": 0.1398210719227791, "signal/brier_reward/centered_abs_mean": 0.06847175061702729, "signal/brier_reward/group_std_mean": 0.09024935513734818, "signal/brier_reward/group_zero_std_frac": 0.09444444626569748, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034235875308513644, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034235875308513644, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004589843703433872, "signal/format_reward/group_std_mean": 0.010953563451766967, "signal/format_reward/group_zero_std_frac": 0.9472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002294921851716936, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002294921851716936, "signal/mean_confidence_reward/centered_abs_mean": 0.06193251013755798, "signal/mean_confidence_reward/group_std_mean": 0.08066735863685608, "signal/mean_confidence_reward/group_zero_std_frac": 0.10555555671453476, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.193250669639383e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.193250669639383e-07, "step": 345 }, { "calibration/aurc": 0.15021197683923637, "calibration/batch_distribution_entropy": 0.6270485825789845, "calibration/batch_entropy_100bins": 0.3587935523162376, "calibration/batch_entropy_10bins": 0.6270485825789845, "calibration/batch_entropy_50bins": 0.42236596455563563, "calibration/batch_uniqueness": 0.46232869790464354, "calibration/confidence_entropy": 0.4789339090468678, "calibration/coverage@0%": 0.05759162303664922, "calibration/coverage@1%": 0.10575916230366492, "calibration/coverage@10%": 0.34087601999905665, "calibration/coverage@15%": 0.49549831081081075, "calibration/coverage@20%": 0.7307291666666667, "calibration/coverage@25%": 0.8890625, "calibration/coverage@30%": 0.9, "calibration/coverage@5%": 0.22252440922597988, "calibration/distribution_entropy_10": 0.6270485825789845, "calibration/distribution_entropy_100": 0.3587935523162376, "calibration/ece": 0.11578408889910854, "calibration/mean_confidence": 0.6800286469388237, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00321180555555558, "completions/max_length": 3813.4, "completions/max_terminated_length": 3813.4, "completions/mean_length": 950.54541015625, "completions/mean_terminated_length": 953.6885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 270.0, "epoch": 0.8413461538461539, "grad_norm": 0.0005441403482109308, "learning_rate": 4.146634615384616e-06, "loss": -0.0012, "num_tokens": 795912747.0, "reward": 1.2941713333129883, "reward_std": 0.11140554696321488, "rewards/accuracy_reward": 0.7353298664093018, "rewards/brier_reward": 0.8562109470367432, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967881917953492, "rewards/mean_confidence_reward": 0.6828558921813965, "sampling/batch_mean_priority_error": 0.04163396657615974, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4722222222222222, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007407169230282307, "sampling/priority_kl": 0.03000009208917618, "sampling/priority_scale": 0.7217172205680982, "sampling/prob_entropy": 10.278950309753418, "sampling/prob_max": 4.364752385299653e-05, "sampling/prob_min": 1.8589558385428974e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8351999878883362, "sampling/prompt_draws_total": 25056.0, "sampling/seen_fraction": 0.6247599959373474, "sampling/unseen_fraction": 0.37524000406265257, "signal/accuracy_reward/centered_abs_mean": 0.12787000387907027, "signal/accuracy_reward/group_std_mean": 0.1749707505106926, "signal/accuracy_reward/group_zero_std_frac": 0.48611111044883726, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06393500193953514, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06393500193953514, "signal/advantage_abs_mean": 0.0786064624786377, "signal/advantage_pre_scale_abs_mean": 0.0786064624786377, "signal/advantage_pre_scale_std": 0.15402153134346008, "signal/advantage_std": 0.15402153134346008, "signal/brier_reward/centered_abs_mean": 0.06964456140995026, "signal/brier_reward/group_std_mean": 0.09630894064903259, "signal/brier_reward/group_zero_std_frac": 0.16111110914498566, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03482228070497513, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03482228070497513, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005126953125, "signal/format_reward/group_std_mean": 0.010197317413985729, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025634765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0025634765625, "signal/mean_confidence_reward/centered_abs_mean": 0.0615044504404068, "signal/mean_confidence_reward/group_std_mean": 0.0810565173625946, "signal/mean_confidence_reward/group_zero_std_frac": 0.18333333767950535, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.150444505692576e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.150444505692576e-07, "step": 350 }, { "epoch": 0.8413461538461539, "eval_calibration/aurc": 0.14071806497163786, "eval_calibration/batch_distribution_entropy": 0.5927960453163179, "eval_calibration/batch_entropy_100bins": 0.3351960947265209, "eval_calibration/batch_entropy_10bins": 0.5927960453163179, "eval_calibration/batch_entropy_50bins": 0.3945874192846862, "eval_calibration/batch_uniqueness": 0.32732551984877123, "eval_calibration/confidence_entropy": 0.44637156937290573, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.551304347826087, "eval_calibration/coverage@15%": 0.697391304347826, "eval_calibration/coverage@20%": 0.7521739130434782, "eval_calibration/coverage@25%": 0.8704347826086957, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5927960453163179, "eval_calibration/distribution_entropy_100": 0.3351960947265209, "eval_calibration/ece": 0.044521739130434675, "eval_calibration/mean_confidence": 0.7302608695652174, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.0017361111111111234, "eval_completions/max_length": 2879.6666666666665, "eval_completions/max_terminated_length": 2879.6666666666665, "eval_completions/mean_length": 956.6055704752604, "eval_completions/mean_terminated_length": 958.2885335286459, "eval_completions/min_length": 192.0, "eval_completions/min_terminated_length": 286.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 795912747.0, "eval_reward": 1.2779152592023213, "eval_reward_std": 0.3102490504582723, "eval_rewards/accuracy_reward": 0.7022569477558136, "eval_rewards/brier_reward": 0.8552951316038767, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9982638955116272, "eval_rewards/mean_confidence_reward": 0.7289930184682211, "eval_runtime": 167.9151, "eval_samples_per_second": 5.955, "eval_signal/accuracy_reward/centered_abs_mean": 0.4070638070503871, "eval_signal/accuracy_reward/group_std_mean": 0.45701082050800323, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20353190352519354, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20353190352519354, "eval_signal/advantage_abs_mean": 0.2656337320804596, "eval_signal/advantage_pre_scale_abs_mean": 0.2656337320804596, "eval_signal/advantage_pre_scale_std": 0.3075862228870392, "eval_signal/advantage_std": 0.3075862228870392, "eval_signal/brier_reward/centered_abs_mean": 0.16687446584304175, "eval_signal/brier_reward/group_std_mean": 0.22345413515965143, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08343723292152087, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08343723292152087, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/group_std_mean": 0.009820927555362383, "eval_signal/format_reward/group_zero_std_frac": 0.944444457689921, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.19890405982732773, "eval_signal/mean_confidence_reward/group_std_mean": 0.2373119369149208, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.9890404511594775e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.9890404511594775e-06, "eval_steps_per_second": 0.036, "step": 350 }, { "epoch": 0.8413461538461539, "step": 350, "train_probe_calibration/aurc": 0.12366766369114877, "train_probe_calibration/batch_distribution_entropy": 0.5780326992172357, "train_probe_calibration/batch_entropy_100bins": 0.32548332021851784, "train_probe_calibration/batch_entropy_10bins": 0.5780326992172357, "train_probe_calibration/batch_entropy_50bins": 0.3831536982852399, "train_probe_calibration/batch_uniqueness": 0.29336063220676245, "train_probe_calibration/confidence_entropy": 0.44255715769472015, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.5683202785030461, "train_probe_calibration/coverage@15%": 0.7745865970409052, "train_probe_calibration/coverage@20%": 0.8772845953002611, "train_probe_calibration/coverage@25%": 0.9530026109660574, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.5780326992172357, "train_probe_calibration/distribution_entropy_100": 0.32548332021851784, "train_probe_calibration/ece": 0.0268929503916449, "train_probe_calibration/mean_confidence": 0.7407310704960837, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.002604166666666685, "train_probe_completions/max_length": 2846.6666666666665, "train_probe_completions/max_terminated_length": 2846.6666666666665, "train_probe_completions/mean_length": 952.1272786458334, "train_probe_completions/mean_terminated_length": 954.6122334798177, "train_probe_completions/min_length": 120.83333333333333, "train_probe_completions/min_terminated_length": 213.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 795912747.0, "train_probe_reward": 1.2993086377779643, "train_probe_reward_std": 0.3012443433205287, "train_probe_rewards/accuracy_reward": 0.7413194378217062, "train_probe_rewards/brier_reward": 0.8598871727784475, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9973958432674408, "train_probe_rewards/mean_confidence_reward": 0.7388020753860474, "train_probe_runtime": 184.2927, "train_probe_samples_per_second": 5.426, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3741319427887599, "train_probe_signal/accuracy_reward/group_std_mean": 0.4373552103837331, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18706597139437994, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18706597139437994, "train_probe_signal/advantage_abs_mean": 0.2482676605383555, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2482676605383555, "train_probe_signal/advantage_pre_scale_std": 0.29956333835919696, "train_probe_signal/advantage_std": 0.29956333835919696, "train_probe_signal/brier_reward/centered_abs_mean": 0.16151477148135504, "train_probe_signal/brier_reward/group_std_mean": 0.22173232833544412, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08075738574067752, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08075738574067752, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "train_probe_signal/format_reward/group_std_mean": 0.014731391333043575, "train_probe_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025227863807231188, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1901095782717069, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.22979472080866495, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.9010956862075545e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.9010956862075545e-06, "train_probe_steps_per_second": 0.033 }, { "calibration/aurc": 0.17479328506484096, "calibration/batch_distribution_entropy": 0.49078581596270715, "calibration/batch_entropy_100bins": 0.2738594645868757, "calibration/batch_entropy_10bins": 0.49078581596270715, "calibration/batch_entropy_50bins": 0.32238293070265683, "calibration/batch_uniqueness": 0.10383284877673402, "calibration/confidence_entropy": 0.42451245543918326, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.22788842425787253, "calibration/coverage@15%": 0.4377930601013447, "calibration/coverage@20%": 0.6457713541939623, "calibration/coverage@25%": 0.6966080957945986, "calibration/coverage@30%": 0.924549551637301, "calibration/coverage@5%": 0.10757180156657964, "calibration/distribution_entropy_10": 0.49078581596270715, "calibration/distribution_entropy_100": 0.2738594645868757, "calibration/ece": 0.1168297092438673, "calibration/mean_confidence": 0.7653914820591261, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0038194444444444643, "completions/max_length": 3793.2, "completions/max_terminated_length": 3793.2, "completions/mean_length": 995.2534790039062, "completions/mean_terminated_length": 999.286328125, "completions/min_length": 0.0, "completions/min_terminated_length": 243.8, "epoch": 0.8533653846153846, "grad_norm": 0.0005921661504544318, "learning_rate": 4.116586538461539e-06, "loss": -0.0041, "num_tokens": 810478099.0, "reward": 1.2731634616851806, "reward_std": 0.13975580632686616, "rewards/accuracy_reward": 0.7113715291023255, "rewards/brier_reward": 0.8388463497161865, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.7715624809265137, "sampling/batch_mean_priority_error": 0.06671788019592899, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5166666666666666, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007469820789992809, "sampling/priority_kl": 0.030000051110982896, "sampling/priority_scale": 0.7204637587303295, "sampling/prob_entropy": 10.278950500488282, "sampling/prob_max": 4.3779354746220635e-05, "sampling/prob_min": 1.8673363229027018e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8472000002861023, "sampling/prompt_draws_total": 25416.0, "sampling/seen_fraction": 0.6306733250617981, "sampling/unseen_fraction": 0.3693266749382019, "signal/accuracy_reward/centered_abs_mean": 0.13954535275697708, "signal/accuracy_reward/group_std_mean": 0.18298389613628388, "signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06977267637848854, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06977267637848854, "signal/advantage_abs_mean": 0.10368146002292633, "signal/advantage_pre_scale_abs_mean": 0.10368146002292633, "signal/advantage_pre_scale_std": 0.1949189454317093, "signal/advantage_std": 0.1949189454317093, "signal/brier_reward/centered_abs_mean": 0.08842111974954606, "signal/brier_reward/group_std_mean": 0.11987781524658203, "signal/brier_reward/group_zero_std_frac": 0.32500000298023224, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04421055987477303, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04421055987477303, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007123480970039964, "signal/format_reward/group_std_mean": 0.014730273373425007, "signal/format_reward/group_zero_std_frac": 0.9361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003561740485019982, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003561740485019982, "signal/mean_confidence_reward/centered_abs_mean": 0.05848633646965027, "signal/mean_confidence_reward/group_std_mean": 0.07816922888159752, "signal/mean_confidence_reward/group_zero_std_frac": 0.3833333432674408, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.848633236382739e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.848633236382739e-07, "step": 355 }, { "calibration/aurc": 0.14227408809205797, "calibration/batch_distribution_entropy": 0.3833968902727182, "calibration/batch_entropy_100bins": 0.20720136062897182, "calibration/batch_entropy_10bins": 0.3833968902727182, "calibration/batch_entropy_50bins": 0.24391408924249838, "calibration/batch_uniqueness": -0.17578774698721988, "calibration/confidence_entropy": 0.39237565439323796, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5404726936466492, "calibration/coverage@15%": 0.5415143603133159, "calibration/coverage@20%": 0.7298369688123187, "calibration/coverage@25%": 0.9333810116871298, "calibration/coverage@30%": 0.9884514435695537, "calibration/coverage@5%": 0.184375, "calibration/distribution_entropy_10": 0.3833968902727182, "calibration/distribution_entropy_100": 0.20720136062897182, "calibration/ece": 0.09318675761811303, "calibration/mean_confidence": 0.822795640520307, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111116, "completions/max_length": 3793.2, "completions/max_terminated_length": 3793.2, "completions/mean_length": 953.820751953125, "completions/mean_terminated_length": 959.3001342773438, "completions/min_length": 0.0, "completions/min_terminated_length": 260.6, "epoch": 0.8653846153846154, "grad_norm": 0.0006347987800836563, "learning_rate": 4.086538461538462e-06, "loss": -0.0045, "num_tokens": 824565474.0, "reward": 1.2802110195159913, "reward_std": 0.12795323729515076, "rewards/accuracy_reward": 0.7278645873069763, "rewards/brier_reward": 0.8384444713592529, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9940972208976746, "rewards/mean_confidence_reward": 0.7921961784362793, "sampling/batch_mean_priority_error": 0.07182012144971406, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.47777777777777775, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007543521653860808, "sampling/priority_kl": 0.029999841749668122, "sampling/priority_scale": 0.7193483173148707, "sampling/prob_entropy": 10.278953170776367, "sampling/prob_max": 4.391378824948333e-05, "sampling/prob_min": 1.8755430210148917e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8592000007629395, "sampling/prompt_draws_total": 25776.0, "sampling/seen_fraction": 0.6365933299064637, "sampling/unseen_fraction": 0.3634066700935364, "signal/accuracy_reward/centered_abs_mean": 0.11312391310930252, "signal/accuracy_reward/group_std_mean": 0.15549919307231902, "signal/accuracy_reward/group_zero_std_frac": 0.5277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05656195655465126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05656195655465126, "signal/advantage_abs_mean": 0.091581329703331, "signal/advantage_pre_scale_abs_mean": 0.091581329703331, "signal/advantage_pre_scale_std": 0.1872180163860321, "signal/advantage_std": 0.1872180163860321, "signal/brier_reward/centered_abs_mean": 0.08145019859075546, "signal/brier_reward/group_std_mean": 0.11254193782806396, "signal/brier_reward/group_zero_std_frac": 0.3555555582046509, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04072509929537773, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04072509929537773, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009537760308012367, "signal/format_reward/group_std_mean": 0.01679510176181793, "signal/format_reward/group_zero_std_frac": 0.9333333611488343, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004768880154006183, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004768880154006183, "signal/mean_confidence_reward/centered_abs_mean": 0.053370780497789386, "signal/mean_confidence_reward/group_std_mean": 0.0711805447936058, "signal/mean_confidence_reward/group_zero_std_frac": 0.44722222685813906, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.337078050615673e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.337078050615673e-07, "step": 360 }, { "calibration/aurc": 0.140028992425538, "calibration/batch_distribution_entropy": 0.3930293886864735, "calibration/batch_entropy_100bins": 0.2175984067067936, "calibration/batch_entropy_10bins": 0.3930293886864735, "calibration/batch_entropy_50bins": 0.2561533236625141, "calibration/batch_uniqueness": -0.08713060293914288, "calibration/confidence_entropy": 0.40117444762509863, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4989583333333333, "calibration/coverage@15%": 0.6760416666666667, "calibration/coverage@20%": 0.7057291666666666, "calibration/coverage@25%": 0.7270833333333333, "calibration/coverage@30%": 0.9098169604003481, "calibration/coverage@5%": 0.2635416666666667, "calibration/distribution_entropy_10": 0.3930293886864735, "calibration/distribution_entropy_100": 0.2175984067067936, "calibration/ece": 0.11969225957354215, "calibration/mean_confidence": 0.7895940763707573, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666674, "completions/max_length": 3381.4, "completions/max_terminated_length": 3381.4, "completions/mean_length": 917.1393310546875, "completions/mean_terminated_length": 921.1011962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.8774038461538461, "grad_norm": 0.000542930094525218, "learning_rate": 4.0564903846153846e-06, "loss": -0.0015, "num_tokens": 838224583.0, "reward": 1.2964860677719117, "reward_std": 0.10884315818548203, "rewards/accuracy_reward": 0.7424479007720948, "rewards/brier_reward": 0.8546753764152527, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9958333373069763, "rewards/mean_confidence_reward": 0.7798090219497681, "sampling/batch_mean_priority_error": 0.060249924041115656, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.48888888888888893, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007615257333964109, "sampling/priority_kl": 0.030000029504299162, "sampling/priority_scale": 0.7183378756279126, "sampling/prob_entropy": 10.278955459594727, "sampling/prob_max": 4.404875508043915e-05, "sampling/prob_min": 1.8835568334907292e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8712000012397766, "sampling/prompt_draws_total": 26136.0, "sampling/seen_fraction": 0.6424266695976257, "sampling/unseen_fraction": 0.35757333040237427, "signal/accuracy_reward/centered_abs_mean": 0.09465603232383728, "signal/accuracy_reward/group_std_mean": 0.13468586504459382, "signal/accuracy_reward/group_zero_std_frac": 0.5833333253860473, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04732801616191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04732801616191864, "signal/advantage_abs_mean": 0.07618805170059204, "signal/advantage_pre_scale_abs_mean": 0.07618805170059204, "signal/advantage_pre_scale_std": 0.16468151807785034, "signal/advantage_std": 0.16468151807785034, "signal/brier_reward/centered_abs_mean": 0.07008838132023812, "signal/brier_reward/group_std_mean": 0.0985906183719635, "signal/brier_reward/group_zero_std_frac": 0.35, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03504419066011906, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03504419066011906, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006065538234543056, "signal/format_reward/group_std_mean": 0.010566732659935952, "signal/format_reward/group_zero_std_frac": 0.9583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003032769117271528, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003032769117271528, "signal/mean_confidence_reward/centered_abs_mean": 0.05134060978889465, "signal/mean_confidence_reward/group_std_mean": 0.06885762214660644, "signal/mean_confidence_reward/group_zero_std_frac": 0.41666666865348817, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.134061041189852e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.134061041189852e-07, "step": 365 }, { "calibration/aurc": 0.1258378555398733, "calibration/batch_distribution_entropy": 0.46751652925453246, "calibration/batch_entropy_100bins": 0.2515257216364021, "calibration/batch_entropy_10bins": 0.46751652925453246, "calibration/batch_entropy_50bins": 0.29609200985829276, "calibration/batch_uniqueness": 0.07823023946937402, "calibration/confidence_entropy": 0.4175166052930705, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.10628272251308901, "calibration/coverage@10%": 0.285970222513089, "calibration/coverage@15%": 0.675792700721104, "calibration/coverage@20%": 0.8834558556894002, "calibration/coverage@25%": 0.971641380826737, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.27190772251308903, "calibration/distribution_entropy_10": 0.46751652925453246, "calibration/distribution_entropy_100": 0.2515257216364021, "calibration/ece": 0.09827271825945014, "calibration/mean_confidence": 0.7923587312701746, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003385416666666652, "completions/max_length": 3863.6, "completions/max_terminated_length": 3863.6, "completions/mean_length": 915.6849975585938, "completions/mean_terminated_length": 918.7907958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.8894230769230769, "grad_norm": 0.0006251902668736875, "learning_rate": 4.026442307692308e-06, "loss": -0.0026, "num_tokens": 851860378.0, "reward": 1.2964604139328002, "reward_std": 0.12138742804527283, "rewards/accuracy_reward": 0.7434895753860473, "rewards/brier_reward": 0.852888023853302, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9965277791023255, "rewards/mean_confidence_reward": 0.7711111187934876, "sampling/batch_mean_priority_error": 0.056980975083554763, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5111111111111111, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007679272443056107, "sampling/priority_kl": 0.029999953880906104, "sampling/priority_scale": 0.7177827418083325, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 4.419601973495446e-05, "sampling/prob_min": 1.891095052997116e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8832000017166137, "sampling/prompt_draws_total": 26496.0, "sampling/seen_fraction": 0.6486466765403748, "sampling/unseen_fraction": 0.3513533234596252, "signal/accuracy_reward/centered_abs_mean": 0.118115234375, "signal/accuracy_reward/group_std_mean": 0.15658425986766816, "signal/accuracy_reward/group_zero_std_frac": 0.55277778506279, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0590576171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0590576171875, "signal/advantage_abs_mean": 0.08963771909475327, "signal/advantage_pre_scale_abs_mean": 0.08963771909475327, "signal/advantage_pre_scale_std": 0.18096743822097777, "signal/advantage_std": 0.18096743822097777, "signal/brier_reward/centered_abs_mean": 0.07543071657419205, "signal/brier_reward/group_std_mean": 0.10182640105485916, "signal/brier_reward/group_zero_std_frac": 0.33888888657093047, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037715358287096025, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037715358287096025, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006477864552289247, "signal/format_reward/group_std_mean": 0.013997751846909523, "signal/format_reward/group_zero_std_frac": 0.9388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0032389322761446236, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032389322761446236, "signal/mean_confidence_reward/centered_abs_mean": 0.05826389938592911, "signal/mean_confidence_reward/group_std_mean": 0.07576072439551354, "signal/mean_confidence_reward/group_zero_std_frac": 0.38333333730697633, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.826389838148316e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.826389838148316e-07, "step": 370 }, { "calibration/aurc": 0.09348026206368412, "calibration/batch_distribution_entropy": 0.588975726328109, "calibration/batch_entropy_100bins": 0.3265009176487136, "calibration/batch_entropy_10bins": 0.588975726328109, "calibration/batch_entropy_50bins": 0.3843515975154779, "calibration/batch_uniqueness": 0.40700246648161914, "calibration/confidence_entropy": 0.4716141812235014, "calibration/coverage@0%": 0.09712793733681462, "calibration/coverage@1%": 0.2689295039164491, "calibration/coverage@10%": 0.6583557606432182, "calibration/coverage@15%": 0.7130799773534009, "calibration/coverage@20%": 0.803554878132134, "calibration/coverage@25%": 0.8672546039351314, "calibration/coverage@30%": 0.9305590323819029, "calibration/coverage@5%": 0.6009145073795106, "calibration/distribution_entropy_10": 0.588975726328109, "calibration/distribution_entropy_100": 0.3265009176487136, "calibration/ece": 0.12552451354808858, "calibration/mean_confidence": 0.7361619589251296, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0028645833333333258, "completions/max_length": 3791.2, "completions/max_terminated_length": 3791.2, "completions/mean_length": 902.0409790039063, "completions/mean_terminated_length": 904.7165649414062, "completions/min_length": 0.0, "completions/min_terminated_length": 196.6, "epoch": 0.9014423076923077, "grad_norm": 0.0006674884352833033, "learning_rate": 3.996394230769231e-06, "loss": -0.0005, "num_tokens": 865337586.0, "reward": 1.297727370262146, "reward_std": 0.11251561045646667, "rewards/accuracy_reward": 0.7408854126930237, "rewards/brier_reward": 0.8575060844421387, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9970486044883728, "rewards/mean_confidence_reward": 0.7293142437934875, "sampling/batch_mean_priority_error": 0.04492750631761033, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4222222222222222, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007732342928647995, "sampling/priority_kl": 0.030000034347176553, "sampling/priority_scale": 0.7166644394630566, "sampling/prob_entropy": 10.278953170776367, "sampling/prob_max": 4.432367641129531e-05, "sampling/prob_min": 1.8990676107932813e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.895199990272522, "sampling/prompt_draws_total": 26856.0, "sampling/seen_fraction": 0.6539800047874451, "sampling/unseen_fraction": 0.34601999521255494, "signal/accuracy_reward/centered_abs_mean": 0.12051323503255844, "signal/accuracy_reward/group_std_mean": 0.16058450937271118, "signal/accuracy_reward/group_zero_std_frac": 0.5250000178813934, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06025661751627922, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06025661751627922, "signal/advantage_abs_mean": 0.0811893805861473, "signal/advantage_pre_scale_abs_mean": 0.0811893805861473, "signal/advantage_pre_scale_std": 0.16239385306835175, "signal/advantage_std": 0.16239385306835175, "signal/brier_reward/centered_abs_mean": 0.06375756785273552, "signal/brier_reward/group_std_mean": 0.08792267739772797, "signal/brier_reward/group_zero_std_frac": 0.1777777761220932, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03187878392636776, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03187878392636776, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005447048670612275, "signal/format_reward/group_std_mean": 0.012354053743183612, "signal/format_reward/group_zero_std_frac": 0.9416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027235243353061376, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027235243353061376, "signal/mean_confidence_reward/centered_abs_mean": 0.05573513433337211, "signal/mean_confidence_reward/group_std_mean": 0.07394854128360748, "signal/mean_confidence_reward/group_zero_std_frac": 0.1972222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.573513362833183e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.573513362833183e-07, "step": 375 }, { "calibration/aurc": 0.08495235291893864, "calibration/batch_distribution_entropy": 0.6155774549758437, "calibration/batch_entropy_100bins": 0.3351735776080944, "calibration/batch_entropy_10bins": 0.6155774549758437, "calibration/batch_entropy_50bins": 0.39456091249719843, "calibration/batch_uniqueness": 0.44102586574209024, "calibration/confidence_entropy": 0.5091412586682026, "calibration/coverage@0%": 0.0203125, "calibration/coverage@1%": 0.35401708006962573, "calibration/coverage@10%": 0.6745866402116402, "calibration/coverage@15%": 0.8016699735449734, "calibration/coverage@20%": 0.8672949735449735, "calibration/coverage@25%": 0.8896908068783069, "calibration/coverage@30%": 0.9447172619047619, "calibration/coverage@5%": 0.5850586085899402, "calibration/distribution_entropy_10": 0.6155774549758437, "calibration/distribution_entropy_100": 0.3351735776080944, "calibration/ece": 0.128370848702115, "calibration/mean_confidence": 0.7126485893530606, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005034722222222255, "completions/max_length": 3856.2, "completions/max_terminated_length": 3856.2, "completions/mean_length": 935.269091796875, "completions/mean_terminated_length": 940.030419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 203.8, "epoch": 0.9134615384615384, "grad_norm": 0.0005975854583084583, "learning_rate": 3.966346153846154e-06, "loss": -0.004, "num_tokens": 879227438.0, "reward": 1.2894989728927613, "reward_std": 0.10981154143810272, "rewards/accuracy_reward": 0.7271701335906983, "rewards/brier_reward": 0.8570225596427917, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9947916626930237, "rewards/mean_confidence_reward": 0.6774826407432556, "sampling/batch_mean_priority_error": 0.03030392225748877, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.43888888888888894, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007766260858625173, "sampling/priority_kl": 0.030000124126672745, "sampling/priority_scale": 0.7155722082359717, "sampling/prob_entropy": 10.278954124450683, "sampling/prob_max": 4.445084559847601e-05, "sampling/prob_min": 1.906977740873117e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9072000026702881, "sampling/prompt_draws_total": 27216.0, "sampling/seen_fraction": 0.6592066645622253, "sampling/unseen_fraction": 0.3407933354377747, "signal/accuracy_reward/centered_abs_mean": 0.1240505650639534, "signal/accuracy_reward/group_std_mean": 0.16211120784282684, "signal/accuracy_reward/group_zero_std_frac": 0.544444465637207, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620252825319767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0620252825319767, "signal/advantage_abs_mean": 0.07988931685686111, "signal/advantage_pre_scale_abs_mean": 0.07988931685686111, "signal/advantage_pre_scale_std": 0.16046163737773894, "signal/advantage_std": 0.16046163737773894, "signal/brier_reward/centered_abs_mean": 0.06219109818339348, "signal/brier_reward/group_std_mean": 0.08544109016656876, "signal/brier_reward/group_zero_std_frac": 0.14166666567325592, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03109554909169674, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03109554909169674, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009016927040647715, "signal/format_reward/group_std_mean": 0.016939852014184, "signal/format_reward/group_zero_std_frac": 0.9305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004508463520323858, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004508463520323858, "signal/mean_confidence_reward/centered_abs_mean": 0.05640299767255783, "signal/mean_confidence_reward/group_std_mean": 0.07368175387382507, "signal/mean_confidence_reward/group_zero_std_frac": 0.15555555671453475, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.64029949146061e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.64029949146061e-07, "step": 380 }, { "calibration/aurc": 0.1405584194396033, "calibration/batch_distribution_entropy": 0.6175718779005612, "calibration/batch_entropy_100bins": 0.33658476146835625, "calibration/batch_entropy_10bins": 0.6175718779005612, "calibration/batch_entropy_50bins": 0.3962221353047351, "calibration/batch_uniqueness": 0.4358823734176201, "calibration/confidence_entropy": 0.49378913317443135, "calibration/coverage@0%": 0.05416666666666666, "calibration/coverage@1%": 0.19930191972076788, "calibration/coverage@10%": 0.3779477530541012, "calibration/coverage@15%": 0.5394169938917976, "calibration/coverage@20%": 0.6689811403237553, "calibration/coverage@25%": 0.7433357939632546, "calibration/coverage@30%": 0.9380577427821523, "calibration/coverage@5%": 0.2701352530541012, "calibration/distribution_entropy_10": 0.6175718779005612, "calibration/distribution_entropy_100": 0.33658476146835625, "calibration/ece": 0.09195633176333981, "calibration/mean_confidence": 0.7355146774642372, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0032986111111111384, "completions/max_length": 3793.8, "completions/max_terminated_length": 3793.8, "completions/mean_length": 903.2882080078125, "completions/mean_terminated_length": 906.3167114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 214.4, "epoch": 0.9254807692307693, "grad_norm": 0.0006492780521512032, "learning_rate": 3.936298076923077e-06, "loss": -0.002, "num_tokens": 892689414.0, "reward": 1.3000523090362548, "reward_std": 0.11470878720283509, "rewards/accuracy_reward": 0.7401041626930237, "rewards/brier_reward": 0.8632847309112549, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967013955116272, "rewards/mean_confidence_reward": 0.716371500492096, "sampling/batch_mean_priority_error": 0.033226227266539954, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4416666666666667, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007798460777848959, "sampling/priority_kl": 0.029999995604157448, "sampling/priority_scale": 0.7147068679099903, "sampling/prob_entropy": 10.278953361511231, "sampling/prob_max": 4.458282419363968e-05, "sampling/prob_min": 1.9145980331813917e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9192000031471252, "sampling/prompt_draws_total": 27576.0, "sampling/seen_fraction": 0.6645333409309387, "sampling/unseen_fraction": 0.33546665906906126, "signal/accuracy_reward/centered_abs_mean": 0.1258029520511627, "signal/accuracy_reward/group_std_mean": 0.16285853683948517, "signal/accuracy_reward/group_zero_std_frac": 0.55, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06290147602558135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06290147602558135, "signal/advantage_abs_mean": 0.08486108630895614, "signal/advantage_pre_scale_abs_mean": 0.08486108630895614, "signal/advantage_pre_scale_std": 0.16529499590396882, "signal/advantage_std": 0.16529499590396882, "signal/brier_reward/centered_abs_mean": 0.0634469136595726, "signal/brier_reward/group_std_mean": 0.08727488517761231, "signal/brier_reward/group_zero_std_frac": 0.15555555671453475, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0317234568297863, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0317234568297863, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00593532978091389, "signal/format_reward/group_std_mean": 0.01148662269115448, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002967664890456945, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002967664890456945, "signal/mean_confidence_reward/centered_abs_mean": 0.057197265326976776, "signal/mean_confidence_reward/group_std_mean": 0.07460127323865891, "signal/mean_confidence_reward/group_zero_std_frac": 0.17222222536802292, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.719726232200629e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.719726232200629e-07, "step": 385 }, { "calibration/aurc": 0.15444463437730716, "calibration/batch_distribution_entropy": 0.6379871401540195, "calibration/batch_entropy_100bins": 0.35082089900099217, "calibration/batch_entropy_10bins": 0.6379871401540195, "calibration/batch_entropy_50bins": 0.41298068606937866, "calibration/batch_uniqueness": 0.45707237434195686, "calibration/confidence_entropy": 0.47556850046240673, "calibration/coverage@0%": 0.23986092788038701, "calibration/coverage@1%": 0.23986092788038701, "calibration/coverage@10%": 0.34862027264731754, "calibration/coverage@15%": 0.4109842238346526, "calibration/coverage@20%": 0.6173785180299032, "calibration/coverage@25%": 0.735137254589043, "calibration/coverage@30%": 0.8669522849462366, "calibration/coverage@5%": 0.34862027264731754, "calibration/distribution_entropy_10": 0.6379871401540195, "calibration/distribution_entropy_100": 0.35082089900099217, "calibration/ece": 0.14569204028002386, "calibration/mean_confidence": 0.7182665767653985, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333348, "completions/max_length": 3975.0, "completions/max_terminated_length": 3975.0, "completions/mean_length": 965.8712768554688, "completions/mean_terminated_length": 974.8541625976562, "completions/min_length": 0.0, "completions/min_terminated_length": 224.4, "epoch": 0.9375, "grad_norm": 0.0006160195334814489, "learning_rate": 3.90625e-06, "loss": -0.0115, "num_tokens": 906936155.0, "reward": 1.2957585334777832, "reward_std": 0.12280676662921905, "rewards/accuracy_reward": 0.7367187380790711, "rewards/brier_reward": 0.8644192695617676, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9903645753860474, "rewards/mean_confidence_reward": 0.72361980676651, "sampling/batch_mean_priority_error": 0.036464205832859656, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.47777777777777786, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007832664065063, "sampling/priority_kl": 0.03000004217028618, "sampling/priority_scale": 0.714122933219187, "sampling/prob_entropy": 10.278953552246094, "sampling/prob_max": 4.4722390157403424e-05, "sampling/prob_min": 1.9219180467189288e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9312000036239624, "sampling/prompt_draws_total": 27936.0, "sampling/seen_fraction": 0.6700466632843017, "sampling/unseen_fraction": 0.32995333671569826, "signal/accuracy_reward/centered_abs_mean": 0.11191948801279068, "signal/accuracy_reward/group_std_mean": 0.15588756203651427, "signal/accuracy_reward/group_zero_std_frac": 0.5277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05595974400639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05595974400639534, "signal/advantage_abs_mean": 0.08563981354236602, "signal/advantage_pre_scale_abs_mean": 0.08563981354236602, "signal/advantage_pre_scale_std": 0.17761370241642, "signal/advantage_std": 0.17761370241642, "signal/brier_reward/centered_abs_mean": 0.07111435532569885, "signal/brier_reward/group_std_mean": 0.098927903175354, "signal/brier_reward/group_zero_std_frac": 0.23888888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035557177662849423, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035557177662849423, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.016693793423473834, "signal/format_reward/group_std_mean": 0.030034196749329566, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008346896711736917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008346896711736917, "signal/mean_confidence_reward/centered_abs_mean": 0.06112522184848786, "signal/mean_confidence_reward/group_std_mean": 0.0802394762635231, "signal/mean_confidence_reward/group_zero_std_frac": 0.2694444477558136, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.112521873546938e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.112521873546938e-07, "step": 390 }, { "calibration/aurc": 0.14231824250564923, "calibration/batch_distribution_entropy": 0.5188558679111168, "calibration/batch_entropy_100bins": 0.27990488901705934, "calibration/batch_entropy_10bins": 0.5188558679111168, "calibration/batch_entropy_50bins": 0.3294995065277213, "calibration/batch_uniqueness": 0.2114929455399162, "calibration/confidence_entropy": 0.4412506440080336, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4840907854656223, "calibration/coverage@15%": 0.6611199057036931, "calibration/coverage@20%": 0.7449531602283396, "calibration/coverage@25%": 0.7973753280839895, "calibration/coverage@30%": 0.9729166666666667, "calibration/coverage@5%": 0.2, "calibration/distribution_entropy_10": 0.5188558679111168, "calibration/distribution_entropy_100": 0.27990488901705934, "calibration/ece": 0.10563489391665462, "calibration/mean_confidence": 0.7899494168157181, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012673611111111094, "completions/max_length": 3903.8, "completions/max_terminated_length": 3903.8, "completions/mean_length": 939.616748046875, "completions/mean_terminated_length": 951.5771362304688, "completions/min_length": 0.0, "completions/min_terminated_length": 222.2, "epoch": 0.9495192307692307, "grad_norm": 0.000712635344825685, "learning_rate": 3.876201923076923e-06, "loss": -0.0117, "num_tokens": 920847516.0, "reward": 1.3004497051239015, "reward_std": 0.13428637385368347, "rewards/accuracy_reward": 0.7579861164093018, "rewards/brier_reward": 0.856179702281952, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.98671875, "rewards/mean_confidence_reward": 0.7419097185134887, "sampling/batch_mean_priority_error": 0.04323727973120382, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4416666666666666, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007874753326177597, "sampling/priority_kl": 0.030000117421150208, "sampling/priority_scale": 0.7136170566780493, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 4.4862589857075366e-05, "sampling/prob_min": 1.9290969066787512e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9431999921798706, "sampling/prompt_draws_total": 28296.0, "sampling/seen_fraction": 0.6754800081253052, "sampling/unseen_fraction": 0.3245199918746948, "signal/accuracy_reward/centered_abs_mean": 0.12306857705116273, "signal/accuracy_reward/group_std_mean": 0.17122169733047485, "signal/accuracy_reward/group_zero_std_frac": 0.47222223281860354, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06153428852558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06153428852558136, "signal/advantage_abs_mean": 0.0938779279589653, "signal/advantage_pre_scale_abs_mean": 0.0938779279589653, "signal/advantage_pre_scale_std": 0.18882446885108947, "signal/advantage_std": 0.18882446885108947, "signal/brier_reward/centered_abs_mean": 0.07441985458135605, "signal/brier_reward/group_std_mean": 0.10555129051208496, "signal/brier_reward/group_zero_std_frac": 0.24166666865348815, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03720992729067803, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03720992729067803, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.016726345475763082, "signal/format_reward/group_std_mean": 0.028362412378191947, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008363172737881541, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008363172737881541, "signal/mean_confidence_reward/centered_abs_mean": 0.05717773661017418, "signal/mean_confidence_reward/group_std_mean": 0.0777185320854187, "signal/mean_confidence_reward/group_zero_std_frac": 0.2694444417953491, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.717773547075921e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.717773547075921e-07, "step": 395 }, { "calibration/aurc": 0.2103259096874605, "calibration/batch_distribution_entropy": 0.5207171743631474, "calibration/batch_entropy_100bins": 0.3006230264744981, "calibration/batch_entropy_10bins": 0.5207171743631474, "calibration/batch_entropy_50bins": 0.35388856272596264, "calibration/batch_uniqueness": 0.18626633909579246, "calibration/confidence_entropy": 0.45076468811045334, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1404699738903394, "calibration/coverage@15%": 0.18580330722367275, "calibration/coverage@20%": 0.2562019147084421, "calibration/coverage@25%": 0.7477652478378727, "calibration/coverage@30%": 0.8831667349726777, "calibration/coverage@5%": 0.13577023498694515, "calibration/distribution_entropy_10": 0.5207171743631474, "calibration/distribution_entropy_100": 0.3006230264744981, "calibration/ece": 0.1485258476552534, "calibration/mean_confidence": 0.7193914784120652, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01822916666666665, "completions/max_length": 3858.2, "completions/max_terminated_length": 3858.2, "completions/mean_length": 969.39208984375, "completions/mean_terminated_length": 987.3859252929688, "completions/min_length": 0.0, "completions/min_terminated_length": 268.2, "epoch": 0.9615384615384616, "grad_norm": 0.0005787439295090735, "learning_rate": 3.846153846153847e-06, "loss": -0.0206, "num_tokens": 935088481.0, "reward": 1.2711968421936035, "reward_std": 0.13464723378419877, "rewards/accuracy_reward": 0.7215277791023255, "rewards/brier_reward": 0.8390807509422302, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9817708373069763, "rewards/mean_confidence_reward": 0.7178385496139527, "sampling/batch_mean_priority_error": 0.051486737806012664, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.44722222222222224, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.007927501015365124, "sampling/priority_kl": 0.029999881982803345, "sampling/priority_scale": 0.7131928384071216, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 4.5004417916061354e-05, "sampling/prob_min": 1.9361668091733008e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9551999926567077, "sampling/prompt_draws_total": 28656.0, "sampling/seen_fraction": 0.6808599948883056, "sampling/unseen_fraction": 0.3191400051116943, "signal/accuracy_reward/centered_abs_mean": 0.12125650942325591, "signal/accuracy_reward/group_std_mean": 0.1649042248725891, "signal/accuracy_reward/group_zero_std_frac": 0.5138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06062825471162796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06062825471162796, "signal/advantage_abs_mean": 0.09647919833660126, "signal/advantage_pre_scale_abs_mean": 0.09647919833660126, "signal/advantage_pre_scale_std": 0.19579894542694093, "signal/advantage_std": 0.19579894542694093, "signal/brier_reward/centered_abs_mean": 0.08560052514076233, "signal/brier_reward/group_std_mean": 0.11464979350566865, "signal/brier_reward/group_zero_std_frac": 0.2750000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.042800262570381165, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.042800262570381165, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02554253526031971, "signal/format_reward/group_std_mean": 0.039585836976766584, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012771267630159854, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012771267630159854, "signal/mean_confidence_reward/centered_abs_mean": 0.06342068463563919, "signal/mean_confidence_reward/group_std_mean": 0.08286617994308472, "signal/mean_confidence_reward/group_zero_std_frac": 0.3166666775941849, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.342068104459031e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.342068104459031e-07, "step": 400 }, { "epoch": 0.9615384615384616, "eval_calibration/aurc": 0.1372654401238463, "eval_calibration/batch_distribution_entropy": 0.6162748041187558, "eval_calibration/batch_entropy_100bins": 0.35525026458055475, "eval_calibration/batch_entropy_10bins": 0.6162748041187558, "eval_calibration/batch_entropy_50bins": 0.4181948635631051, "eval_calibration/batch_uniqueness": 0.3992356427296172, "eval_calibration/confidence_entropy": 0.4599076584380884, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5528169014084507, "eval_calibration/coverage@15%": 0.6575704225352113, "eval_calibration/coverage@20%": 0.8107394366197183, "eval_calibration/coverage@25%": 0.8802816901408451, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6162748041187558, "eval_calibration/distribution_entropy_100": 0.35525026458055475, "eval_calibration/ece": 0.036355633802816784, "eval_calibration/mean_confidence": 0.6766725352112677, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.013888888888888914, "eval_completions/max_length": 3091.8333333333335, "eval_completions/max_terminated_length": 3091.8333333333335, "eval_completions/mean_length": 965.8703206380209, "eval_completions/mean_terminated_length": 979.3125712076823, "eval_completions/min_length": 72.16666666666667, "eval_completions/min_terminated_length": 301.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 935088481.0, "eval_reward": 1.2657662431399028, "eval_reward_std": 0.3163274774948756, "eval_rewards/accuracy_reward": 0.703125, "eval_rewards/brier_reward": 0.8422830005486807, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9861111243565878, "eval_rewards/mean_confidence_reward": 0.667274276415507, "eval_runtime": 214.8399, "eval_samples_per_second": 4.655, "eval_signal/accuracy_reward/centered_abs_mean": 0.4060329844554265, "eval_signal/accuracy_reward/group_std_mean": 0.456397940715154, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20301649222771326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20301649222771326, "eval_signal/advantage_abs_mean": 0.26114028692245483, "eval_signal/advantage_pre_scale_abs_mean": 0.26114028692245483, "eval_signal/advantage_pre_scale_std": 0.31408119946718216, "eval_signal/advantage_std": 0.31408119946718216, "eval_signal/brier_reward/centered_abs_mean": 0.17281847819685936, "eval_signal/brier_reward/group_std_mean": 0.23123475164175034, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08640923909842968, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08640923909842968, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.026692708022892475, "eval_signal/format_reward/group_std_mean": 0.07258860456446807, "eval_signal/format_reward/group_zero_std_frac": 0.6111111268401146, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013346354011446238, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.013346354011446238, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.24405381083488464, "eval_signal/mean_confidence_reward/group_std_mean": 0.278071865439415, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.4405380069462503e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.4405380069462503e-06, "eval_steps_per_second": 0.028, "step": 400 }, { "epoch": 0.9615384615384616, "step": 400, "train_probe_calibration/aurc": 0.09306832002660875, "train_probe_calibration/batch_distribution_entropy": 0.6328398216611376, "train_probe_calibration/batch_entropy_100bins": 0.3528405852438377, "train_probe_calibration/batch_entropy_10bins": 0.6328398216611376, "train_probe_calibration/batch_entropy_50bins": 0.4153582280362068, "train_probe_calibration/batch_uniqueness": 0.39234035658169547, "train_probe_calibration/confidence_entropy": 0.4567299333781084, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.6769638128861429, "train_probe_calibration/coverage@15%": 0.8420123565754634, "train_probe_calibration/coverage@20%": 0.8861429832303619, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.6328398216611376, "train_probe_calibration/distribution_entropy_100": 0.3528405852438377, "train_probe_calibration/ece": 0.07458075904677838, "train_probe_calibration/mean_confidence": 0.6906443071491616, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.02309027777777778, "train_probe_completions/max_length": 3173.0, "train_probe_completions/max_terminated_length": 3173.0, "train_probe_completions/mean_length": 945.7347412109375, "train_probe_completions/mean_terminated_length": 967.8654988606771, "train_probe_completions/min_length": 37.5, "train_probe_completions/min_terminated_length": 231.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 935088481.0, "train_probe_reward": 1.297945221265157, "train_probe_reward_std": 0.2997801552216212, "train_probe_rewards/accuracy_reward": 0.7526041567325592, "train_probe_rewards/brier_reward": 0.8597656190395355, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9835069378217062, "train_probe_rewards/mean_confidence_reward": 0.6792534788449606, "train_probe_runtime": 214.6557, "train_probe_samples_per_second": 4.659, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3627929737170537, "train_probe_signal/accuracy_reward/group_std_mean": 0.43074525396029156, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18139648685852686, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18139648685852686, "train_probe_signal/advantage_abs_mean": 0.23724218209584555, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23724218209584555, "train_probe_signal/advantage_pre_scale_std": 0.3022302836179733, "train_probe_signal/advantage_std": 0.3022302836179733, "train_probe_signal/brier_reward/centered_abs_mean": 0.15757650882005692, "train_probe_signal/brier_reward/group_std_mean": 0.2167892505725225, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07878825441002846, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07878825441002846, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.031087239272892475, "train_probe_signal/format_reward/group_std_mean": 0.07184661490221818, "train_probe_signal/format_reward/group_zero_std_frac": 0.6666666840513548, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.015543619636446238, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.015543619636446238, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.2371364881594976, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.27242950598398846, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.371364795787182e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.371364795787182e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.09160201940281179, "calibration/batch_distribution_entropy": 0.5302120579669701, "calibration/batch_entropy_100bins": 0.29462174083381854, "calibration/batch_entropy_10bins": 0.5302120579669701, "calibration/batch_entropy_50bins": 0.3468239463697428, "calibration/batch_uniqueness": 0.19472494727986614, "calibration/confidence_entropy": 0.4459124291933553, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5100920743816573, "calibration/coverage@15%": 0.877302995137047, "calibration/coverage@20%": 0.9473958333333334, "calibration/coverage@25%": 0.9713541666666666, "calibration/coverage@30%": 0.9833333333333334, "calibration/coverage@5%": 0.4398577063679068, "calibration/distribution_entropy_10": 0.5302120579669701, "calibration/distribution_entropy_100": 0.29462174083381854, "calibration/ece": 0.14295423778611177, "calibration/mean_confidence": 0.7306168172362433, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3763.4, "completions/max_terminated_length": 3763.4, "completions/mean_length": 998.8343017578125, "completions/mean_terminated_length": 1011.7352783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 254.2, "epoch": 0.9735576923076923, "grad_norm": 0.0004764749901369214, "learning_rate": 3.81610576923077e-06, "loss": -0.0115, "num_tokens": 949714508.0, "reward": 1.2655861377716064, "reward_std": 0.12250902205705642, "rewards/accuracy_reward": 0.7224826335906982, "rewards/brier_reward": 0.8213498592376709, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9873263835906982, "rewards/mean_confidence_reward": 0.668897545337677, "sampling/batch_mean_priority_error": 0.05678419767544698, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4083333333333333, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.00798400416970253, "sampling/priority_kl": 0.02999998740851879, "sampling/priority_scale": 0.7126380980247632, "sampling/prob_entropy": 10.278954887390137, "sampling/prob_max": 4.5139902067603546e-05, "sampling/prob_min": 1.943270181072876e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9672000050544739, "sampling/prompt_draws_total": 29016.0, "sampling/seen_fraction": 0.6859200119972229, "sampling/unseen_fraction": 0.3140799880027771, "signal/accuracy_reward/centered_abs_mean": 0.12692600041627883, "signal/accuracy_reward/group_std_mean": 0.17087701857089996, "signal/accuracy_reward/group_zero_std_frac": 0.4972222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06346300020813941, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06346300020813941, "signal/advantage_abs_mean": 0.08730639815330506, "signal/advantage_pre_scale_abs_mean": 0.08730639815330506, "signal/advantage_pre_scale_std": 0.1753060758113861, "signal/advantage_std": 0.1753060758113861, "signal/brier_reward/centered_abs_mean": 0.08770604431629181, "signal/brier_reward/group_std_mean": 0.11753087639808654, "signal/brier_reward/group_zero_std_frac": 0.24166666865348815, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043853022158145905, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.043853022158145905, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01763237863779068, "signal/format_reward/group_std_mean": 0.028114334866404533, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00881618931889534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00881618931889534, "signal/mean_confidence_reward/centered_abs_mean": 0.06661078855395317, "signal/mean_confidence_reward/group_std_mean": 0.08901985883712768, "signal/mean_confidence_reward/group_zero_std_frac": 0.2722222298383713, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.661078714387258e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.661078714387258e-07, "step": 405 }, { "calibration/aurc": 0.146472110841029, "calibration/batch_distribution_entropy": 0.5287236575059514, "calibration/batch_entropy_100bins": 0.2992488069162606, "calibration/batch_entropy_10bins": 0.5287236575059514, "calibration/batch_entropy_50bins": 0.35227085369669153, "calibration/batch_uniqueness": 0.20660984918907693, "calibration/confidence_entropy": 0.4404203378198561, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.43997464813960063, "calibration/coverage@15%": 0.47216462175437623, "calibration/coverage@20%": 0.6504767488936355, "calibration/coverage@25%": 0.8245111648494333, "calibration/coverage@30%": 0.9152815013404826, "calibration/coverage@5%": 0.3947077991416191, "calibration/distribution_entropy_10": 0.5287236575059514, "calibration/distribution_entropy_100": 0.2992488069162606, "calibration/ece": 0.11603427993313538, "calibration/mean_confidence": 0.7137951490105662, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222232, "completions/max_length": 3923.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 979.064501953125, "completions/mean_terminated_length": 990.3482177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 216.6, "epoch": 0.9855769230769231, "grad_norm": 0.0004954978358000517, "learning_rate": 3.7860576923076927e-06, "loss": -0.0151, "num_tokens": 964046835.0, "reward": 1.274612021446228, "reward_std": 0.1268946871161461, "rewards/accuracy_reward": 0.7220486164093017, "rewards/brier_reward": 0.8384461641311646, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9887152791023255, "rewards/mean_confidence_reward": 0.7021701455116272, "sampling/batch_mean_priority_error": 0.052499148894268544, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.43888888888888894, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.008041384257376194, "sampling/priority_kl": 0.02999994605779648, "sampling/priority_scale": 0.7123283087974415, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 4.5282146311365067e-05, "sampling/prob_min": 1.950107980519533e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.979200005531311, "sampling/prompt_draws_total": 29376.0, "sampling/seen_fraction": 0.6911133289337158, "sampling/unseen_fraction": 0.3088866710662842, "signal/accuracy_reward/centered_abs_mean": 0.12586805671453477, "signal/accuracy_reward/group_std_mean": 0.16451444029808043, "signal/accuracy_reward/group_zero_std_frac": 0.5388888835906982, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06293402835726739, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06293402835726739, "signal/advantage_abs_mean": 0.09020028412342071, "signal/advantage_pre_scale_abs_mean": 0.09020028412342071, "signal/advantage_pre_scale_std": 0.18385487496852876, "signal/advantage_std": 0.18385487496852876, "signal/brier_reward/centered_abs_mean": 0.08396928757429123, "signal/brier_reward/group_std_mean": 0.11364553421735764, "signal/brier_reward/group_zero_std_frac": 0.347222226858139, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.041984643787145615, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.041984643787145615, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.019259982742369175, "signal/format_reward/group_std_mean": 0.03404767997562885, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009629991371184587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009629991371184587, "signal/mean_confidence_reward/centered_abs_mean": 0.06446019485592842, "signal/mean_confidence_reward/group_std_mean": 0.08440773785114289, "signal/mean_confidence_reward/group_zero_std_frac": 0.39166666865348815, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.446019142458681e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.446019142458681e-07, "step": 410 }, { "calibration/aurc": 0.20424447506264923, "calibration/batch_distribution_entropy": 0.5956056427737836, "calibration/batch_entropy_100bins": 0.34593084307848915, "calibration/batch_entropy_10bins": 0.5956056427737836, "calibration/batch_entropy_50bins": 0.40722419135785, "calibration/batch_uniqueness": 0.4067028193692369, "calibration/confidence_entropy": 0.4794688579045291, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.08421052631578947, "calibration/coverage@10%": 0.3074001933968781, "calibration/coverage@15%": 0.45175714877745543, "calibration/coverage@20%": 0.5347810471059539, "calibration/coverage@25%": 0.5909490261085786, "calibration/coverage@30%": 0.8287976153474237, "calibration/coverage@5%": 0.0968421052631579, "calibration/distribution_entropy_10": 0.5956056427737836, "calibration/distribution_entropy_100": 0.34593084307848915, "calibration/ece": 0.12277091535433071, "calibration/mean_confidence": 0.6573581252590137, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00859375, "completions/max_length": 3644.8, "completions/max_terminated_length": 3644.8, "completions/mean_length": 1015.5580810546875, "completions/mean_terminated_length": 1024.4833251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 256.4, "epoch": 0.9975961538461539, "grad_norm": 0.0004494949243962765, "learning_rate": 3.756009615384616e-06, "loss": -0.0094, "num_tokens": 978866064.0, "reward": 1.2763642072677612, "reward_std": 0.11199232935905457, "rewards/accuracy_reward": 0.70625, "rewards/brier_reward": 0.8552326440811158, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9912326335906982, "rewards/mean_confidence_reward": 0.6571180343627929, "sampling/batch_mean_priority_error": 0.04054459026418521, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.43611111111111106, "sampling/error_ema_max": 0.16249588131904602, "sampling/error_ema_mean": 0.008088457770645619, "sampling/priority_kl": 0.029999810457229614, "sampling/priority_scale": 0.7122232496039942, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 4.542987808235921e-05, "sampling/prob_min": 1.956713203981053e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9911999940872193, "sampling/prompt_draws_total": 29736.0, "sampling/seen_fraction": 0.6963999986648559, "sampling/unseen_fraction": 0.30360000133514403, "signal/accuracy_reward/centered_abs_mean": 0.12038845419883729, "signal/accuracy_reward/group_std_mean": 0.1608419895172119, "signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06019422709941864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06019422709941864, "signal/advantage_abs_mean": 0.07947031259536744, "signal/advantage_pre_scale_abs_mean": 0.07947031259536744, "signal/advantage_pre_scale_std": 0.1640724152326584, "signal/advantage_std": 0.1640724152326584, "signal/brier_reward/centered_abs_mean": 0.07228801995515824, "signal/brier_reward/group_std_mean": 0.10005511939525605, "signal/brier_reward/group_zero_std_frac": 0.26111111640930174, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03614400997757912, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03614400997757912, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.014339192397892475, "signal/format_reward/group_std_mean": 0.0262321874499321, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007169596198946237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007169596198946237, "signal/mean_confidence_reward/centered_abs_mean": 0.061816409975290296, "signal/mean_confidence_reward/group_std_mean": 0.08097582757472992, "signal/mean_confidence_reward/group_zero_std_frac": 0.28333333432674407, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.181640742397576e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.181640742397576e-07, "step": 415 }, { "calibration/aurc": 0.15625822134062678, "calibration/batch_distribution_entropy": 0.5187429287304861, "calibration/batch_entropy_100bins": 0.29191831446718414, "calibration/batch_entropy_10bins": 0.5187429287304861, "calibration/batch_entropy_50bins": 0.3436415165920129, "calibration/batch_uniqueness": 0.17664039554168148, "calibration/confidence_entropy": 0.454614391014447, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.05511811023622047, "calibration/coverage@10%": 0.3866551837270341, "calibration/coverage@15%": 0.5913426837270341, "calibration/coverage@20%": 0.7276694381129991, "calibration/coverage@25%": 0.7998906962287609, "calibration/coverage@30%": 0.8527614311368975, "calibration/coverage@5%": 0.05931758530183727, "calibration/distribution_entropy_10": 0.5187429287304861, "calibration/distribution_entropy_100": 0.29191831446718414, "calibration/ece": 0.07153017077635034, "calibration/mean_confidence": 0.7071844047865727, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002430555555555558, "completions/max_length": 3184.6, "completions/max_terminated_length": 3184.6, "completions/mean_length": 974.892626953125, "completions/mean_terminated_length": 977.2836303710938, "completions/min_length": 86.6, "completions/min_terminated_length": 263.4, "epoch": 1.0096153846153846, "grad_norm": 0.00075355643639341, "learning_rate": 3.725961538461539e-06, "loss": -0.0027, "num_tokens": 993413498.0, "reward": 1.267124342918396, "reward_std": 0.12146431356668472, "rewards/accuracy_reward": 0.6927083373069763, "rewards/brier_reward": 0.8456927061080932, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9958333253860474, "rewards/mean_confidence_reward": 0.7187413096427917, "sampling/batch_mean_priority_error": 0.05120892290939198, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4083333333333334, "sampling/error_ema_max": 0.19449620842933654, "sampling/error_ema_mean": 0.008134695142507553, "sampling/priority_kl": 0.030000004172325134, "sampling/priority_scale": 0.7118521988624706, "sampling/prob_entropy": 10.278953361511231, "sampling/prob_max": 4.5568116183858366e-05, "sampling/prob_min": 1.963528666237835e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.0032000064849853, "sampling/prompt_draws_total": 30096.0, "sampling/seen_fraction": 0.701253342628479, "sampling/unseen_fraction": 0.298746657371521, "signal/accuracy_reward/centered_abs_mean": 0.12330729216337204, "signal/accuracy_reward/group_std_mean": 0.16879068613052367, "signal/accuracy_reward/group_zero_std_frac": 0.4972222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06165364608168602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06165364608168602, "signal/advantage_abs_mean": 0.08633148670196533, "signal/advantage_pre_scale_abs_mean": 0.08633148670196533, "signal/advantage_pre_scale_std": 0.17180612981319426, "signal/advantage_std": 0.17180612981319426, "signal/brier_reward/centered_abs_mean": 0.07244243398308754, "signal/brier_reward/group_std_mean": 0.10006694793701172, "signal/brier_reward/group_zero_std_frac": 0.3, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03622121699154377, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03622121699154377, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00702039924217388, "signal/format_reward/group_std_mean": 0.013451122678816319, "signal/format_reward/group_zero_std_frac": 0.9416666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00351019962108694, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00351019962108694, "signal/mean_confidence_reward/centered_abs_mean": 0.05719347670674324, "signal/mean_confidence_reward/group_std_mean": 0.07559116035699845, "signal/mean_confidence_reward/group_zero_std_frac": 0.3555555582046509, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.71934742765734e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.71934742765734e-07, "step": 420 }, { "calibration/aurc": 0.06865467727784927, "calibration/batch_distribution_entropy": 0.4543795898254017, "calibration/batch_entropy_100bins": 0.24974455856950017, "calibration/batch_entropy_10bins": 0.4543795898254017, "calibration/batch_entropy_50bins": 0.29399525351491274, "calibration/batch_uniqueness": 0.013456829224856321, "calibration/confidence_entropy": 0.4199663795097931, "calibration/coverage@0%": 0.15, "calibration/coverage@1%": 0.15, "calibration/coverage@10%": 0.6710055774278215, "calibration/coverage@15%": 0.9165231299212598, "calibration/coverage@20%": 0.9682291666666666, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5911991469816273, "calibration/distribution_entropy_10": 0.4543795898254017, "calibration/distribution_entropy_100": 0.24974455856950017, "calibration/ece": 0.086389435695538, "calibration/mean_confidence": 0.7852444225721785, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001736111111111116, "completions/max_length": 3634.6, "completions/max_terminated_length": 3634.6, "completions/mean_length": 960.628564453125, "completions/mean_terminated_length": 962.2773803710937, "completions/min_length": 0.0, "completions/min_terminated_length": 240.6, "epoch": 1.0216346153846154, "grad_norm": 0.0008929867180995643, "learning_rate": 3.695913461538462e-06, "loss": -0.0011, "num_tokens": 1007578723.0, "reward": 1.2825724124908446, "reward_std": 0.12003176510334015, "rewards/accuracy_reward": 0.7144097208976745, "rewards/brier_reward": 0.8527161598205566, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9980034828186035, "rewards/mean_confidence_reward": 0.7765104055404664, "sampling/batch_mean_priority_error": 0.05508253028193806, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4333333333333333, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008189113438129425, "sampling/priority_kl": 0.030000052973628046, "sampling/priority_scale": 0.7119282067054883, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 4.572009711409919e-05, "sampling/prob_min": 1.9699002950801515e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.0152000188827515, "sampling/prompt_draws_total": 30456.0, "sampling/seen_fraction": 0.7064599990844727, "sampling/unseen_fraction": 0.29354000091552734, "signal/accuracy_reward/centered_abs_mean": 0.11453992873430252, "signal/accuracy_reward/group_std_mean": 0.1561146765947342, "signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05726996436715126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05726996436715126, "signal/advantage_abs_mean": 0.08596721366047859, "signal/advantage_pre_scale_abs_mean": 0.08596721366047859, "signal/advantage_pre_scale_std": 0.17472383975982667, "signal/advantage_std": 0.17472383975982667, "signal/brier_reward/centered_abs_mean": 0.06893061101436615, "signal/brier_reward/group_std_mean": 0.0971571832895279, "signal/brier_reward/group_zero_std_frac": 0.3444444537162781, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034465305507183075, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034465305507183075, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0037272135145030917, "signal/format_reward/group_std_mean": 0.008416965417563916, "signal/format_reward/group_zero_std_frac": 0.9611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018636067572515458, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0018636067572515458, "signal/mean_confidence_reward/centered_abs_mean": 0.04610461071133613, "signal/mean_confidence_reward/group_std_mean": 0.06151590570807457, "signal/mean_confidence_reward/group_zero_std_frac": 0.39444445371627807, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.6104609623398575e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.6104609623398575e-07, "step": 425 }, { "calibration/aurc": 0.16252197159696252, "calibration/batch_distribution_entropy": 0.551132185909566, "calibration/batch_entropy_100bins": 0.3044312413497231, "calibration/batch_entropy_10bins": 0.551132185909566, "calibration/batch_entropy_50bins": 0.358371531660677, "calibration/batch_uniqueness": 0.25293511284722225, "calibration/confidence_entropy": 0.45229610428378975, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.12291666666666667, "calibration/coverage@10%": 0.2604166666666667, "calibration/coverage@15%": 0.4239583333333334, "calibration/coverage@20%": 0.6843750000000001, "calibration/coverage@25%": 0.8161458333333333, "calibration/coverage@30%": 0.8630208333333332, "calibration/coverage@5%": 0.2453125, "calibration/distribution_entropy_10": 0.551132185909566, "calibration/distribution_entropy_100": 0.3044312413497231, "calibration/ece": 0.15208333333333335, "calibration/mean_confidence": 0.7494791666666668, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001041666666666674, "completions/max_length": 3636.6, "completions/max_terminated_length": 3636.6, "completions/mean_length": 972.2225708007812, "completions/mean_terminated_length": 973.2240600585938, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 1.0336538461538463, "grad_norm": 0.0007758528809063137, "learning_rate": 3.665865384615385e-06, "loss": -0.0007, "num_tokens": 1021881479.0, "reward": 1.2820632934570313, "reward_std": 0.1231762483716011, "rewards/accuracy_reward": 0.7171875, "rewards/brier_reward": 0.8480520844459534, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9988715171813964, "rewards/mean_confidence_reward": 0.7767274141311645, "sampling/batch_mean_priority_error": 0.052858080641298974, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4138888888888889, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008248506300151347, "sampling/priority_kl": 0.029999766498804092, "sampling/priority_scale": 0.7118548452155664, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 4.586404174915515e-05, "sampling/prob_min": 1.9762736701522954e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.0271999835968018, "sampling/prompt_draws_total": 30816.0, "sampling/seen_fraction": 0.7113333344459534, "sampling/unseen_fraction": 0.28866666555404663, "signal/accuracy_reward/centered_abs_mean": 0.12096354067325592, "signal/accuracy_reward/group_std_mean": 0.1624488949775696, "signal/accuracy_reward/group_zero_std_frac": 0.5250000178813934, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06048177033662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06048177033662796, "signal/advantage_abs_mean": 0.08952582180500031, "signal/advantage_pre_scale_abs_mean": 0.08952582180500031, "signal/advantage_pre_scale_std": 0.1779074639081955, "signal/advantage_std": 0.1779074639081955, "signal/brier_reward/centered_abs_mean": 0.0699217140674591, "signal/brier_reward/group_std_mean": 0.09810963273048401, "signal/brier_reward/group_zero_std_frac": 0.32777778506278993, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03496085703372955, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03496085703372955, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0021538628148846327, "signal/format_reward/group_std_mean": 0.005733087472617626, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010769314074423164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010769314074423164, "signal/mean_confidence_reward/centered_abs_mean": 0.0436925008893013, "signal/mean_confidence_reward/group_std_mean": 0.05805739238858223, "signal/mean_confidence_reward/group_zero_std_frac": 0.40277777910232543, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.369250177660433e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.369250177660433e-07, "step": 430 }, { "calibration/aurc": 0.1326655917417161, "calibration/batch_distribution_entropy": 0.4668999632133756, "calibration/batch_entropy_100bins": 0.25158488405771856, "calibration/batch_entropy_10bins": 0.4668999632133756, "calibration/batch_entropy_50bins": 0.29616165490342655, "calibration/batch_uniqueness": 0.03741925538473879, "calibration/confidence_entropy": 0.4208925983815625, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.15287958115183248, "calibration/coverage@10%": 0.25787185761349496, "calibration/coverage@15%": 0.7178876734948214, "calibration/coverage@20%": 0.8331021540469974, "calibration/coverage@25%": 0.9232851936466492, "calibration/coverage@30%": 0.9566579634464751, "calibration/coverage@5%": 0.1649214659685864, "calibration/distribution_entropy_10": 0.4668999632133756, "calibration/distribution_entropy_100": 0.25158488405771856, "calibration/ece": 0.11240595837377372, "calibration/mean_confidence": 0.7994348054477146, "calibration/unique_confidence_per_question": 0.01927083333333333, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00199652777777779, "completions/max_length": 3659.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 985.06005859375, "completions/mean_terminated_length": 987.0644653320312, "completions/min_length": 56.6, "completions/min_terminated_length": 256.8, "epoch": 1.0456730769230769, "grad_norm": 0.0006942002801224589, "learning_rate": 3.635817307692308e-06, "loss": -0.0008, "num_tokens": 1036339675.0, "reward": 1.2937331438064574, "reward_std": 0.1319456309080124, "rewards/accuracy_reward": 0.7370659828186035, "rewards/brier_reward": 0.8524679064750671, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9979166746139526, "rewards/mean_confidence_reward": 0.7897829651832581, "sampling/batch_mean_priority_error": 0.0520680581107638, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008302754536271096, "sampling/priority_kl": 0.030000053718686102, "sampling/priority_scale": 0.7120462120277807, "sampling/prob_entropy": 10.278950881958007, "sampling/prob_max": 4.6015283442102375e-05, "sampling/prob_min": 1.9823418188025244e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.039199995994568, "sampling/prompt_draws_total": 31176.0, "sampling/seen_fraction": 0.7163466691970826, "sampling/unseen_fraction": 0.2836533308029175, "signal/accuracy_reward/centered_abs_mean": 0.13167860358953476, "signal/accuracy_reward/group_std_mean": 0.1694795623421669, "signal/accuracy_reward/group_zero_std_frac": 0.5277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06583930179476738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06583930179476738, "signal/advantage_abs_mean": 0.10044280737638474, "signal/advantage_pre_scale_abs_mean": 0.10044280737638474, "signal/advantage_pre_scale_std": 0.18933759331703187, "signal/advantage_std": 0.18933759331703187, "signal/brier_reward/centered_abs_mean": 0.0774193748831749, "signal/brier_reward/group_std_mean": 0.10275954306125641, "signal/brier_reward/group_zero_std_frac": 0.31666666865348814, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03870968744158745, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03870968744158745, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0038411458488553764, "signal/format_reward/group_std_mean": 0.008209389261901378, "signal/format_reward/group_zero_std_frac": 0.9638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019205729244276882, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019205729244276882, "signal/mean_confidence_reward/centered_abs_mean": 0.04758844077587128, "signal/mean_confidence_reward/group_std_mean": 0.06242263168096542, "signal/mean_confidence_reward/group_zero_std_frac": 0.36111111640930177, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.758843715535477e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.758843715535477e-07, "step": 435 }, { "calibration/aurc": 0.11291767152922454, "calibration/batch_distribution_entropy": 0.5680532576297793, "calibration/batch_entropy_100bins": 0.3125931234346516, "calibration/batch_entropy_10bins": 0.5680532576297793, "calibration/batch_entropy_50bins": 0.3679795671929091, "calibration/batch_uniqueness": 0.3136267302700734, "calibration/confidence_entropy": 0.4582802544004433, "calibration/coverage@0%": 0.1375, "calibration/coverage@1%": 0.4014249244733641, "calibration/coverage@10%": 0.5410640270506109, "calibration/coverage@15%": 0.5890625, "calibration/coverage@20%": 0.709375, "calibration/coverage@25%": 0.8984375, "calibration/coverage@30%": 0.9291666666666668, "calibration/coverage@5%": 0.49184936736474694, "calibration/distribution_entropy_10": 0.5680532576297793, "calibration/distribution_entropy_100": 0.3125931234346516, "calibration/ece": 0.14599718654737326, "calibration/mean_confidence": 0.7607226890899895, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0008680555555555802, "completions/max_length": 3281.2, "completions/max_terminated_length": 3281.2, "completions/mean_length": 996.7343872070312, "completions/mean_terminated_length": 997.6031860351562, "completions/min_length": 0.0, "completions/min_terminated_length": 249.2, "epoch": 1.0576923076923077, "grad_norm": 0.0006608100957237184, "learning_rate": 3.605769230769231e-06, "loss": 0.0003, "num_tokens": 1050923655.0, "reward": 1.2805131673812866, "reward_std": 0.1213545024394989, "rewards/accuracy_reward": 0.7131944537162781, "rewards/brier_reward": 0.8487717032432556, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9990451335906982, "rewards/mean_confidence_reward": 0.7509288191795349, "sampling/batch_mean_priority_error": 0.04343561407930108, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3805555555555556, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.00835284162312746, "sampling/priority_kl": 0.03000011034309864, "sampling/priority_scale": 0.7119822085136548, "sampling/prob_entropy": 10.27895450592041, "sampling/prob_max": 4.6156414464348926e-05, "sampling/prob_min": 1.988572548725642e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.051199984550476, "sampling/prompt_draws_total": 31536.0, "sampling/seen_fraction": 0.7209666728973388, "sampling/unseen_fraction": 0.27903332710266116, "signal/accuracy_reward/centered_abs_mean": 0.12660590559244156, "signal/accuracy_reward/group_std_mean": 0.1687839388847351, "signal/accuracy_reward/group_zero_std_frac": 0.5138888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06330295279622078, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06330295279622078, "signal/advantage_abs_mean": 0.08899948745965958, "signal/advantage_pre_scale_abs_mean": 0.08899948745965958, "signal/advantage_pre_scale_std": 0.17068710923194885, "signal/advantage_std": 0.17068710923194885, "signal/brier_reward/centered_abs_mean": 0.0685460977256298, "signal/brier_reward/group_std_mean": 0.092455093562603, "signal/brier_reward/group_zero_std_frac": 0.20833333432674409, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0342730488628149, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0342730488628149, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0018391926772892476, "signal/format_reward/group_std_mean": 0.0051025690510869024, "signal/format_reward/group_zero_std_frac": 0.9722222089767456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009195963386446238, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009195963386446238, "signal/mean_confidence_reward/centered_abs_mean": 0.04989257976412773, "signal/mean_confidence_reward/group_std_mean": 0.06543896198272706, "signal/mean_confidence_reward/group_zero_std_frac": 0.23333333432674408, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.989257604393061e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.989257604393061e-07, "step": 440 }, { "calibration/aurc": 0.13666503221385767, "calibration/batch_distribution_entropy": 0.7076334146559488, "calibration/batch_entropy_100bins": 0.3924286491471299, "calibration/batch_entropy_10bins": 0.7076334146559488, "calibration/batch_entropy_50bins": 0.4619606563336549, "calibration/batch_uniqueness": 0.5822380897384157, "calibration/confidence_entropy": 0.5120896384533352, "calibration/coverage@0%": 0.09010416666666667, "calibration/coverage@1%": 0.19583333333333333, "calibration/coverage@10%": 0.5733857814593781, "calibration/coverage@15%": 0.6379691147927115, "calibration/coverage@20%": 0.7316844271070772, "calibration/coverage@25%": 0.7901442291088179, "calibration/coverage@30%": 0.8031732217110807, "calibration/coverage@5%": 0.4139550264550264, "calibration/distribution_entropy_10": 0.7076334146559488, "calibration/distribution_entropy_100": 0.3924286491471299, "calibration/ece": 0.13634455038542825, "calibration/mean_confidence": 0.6688398879460401, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00277777777777779, "completions/max_length": 3636.2, "completions/max_terminated_length": 3636.2, "completions/mean_length": 991.1034912109375, "completions/mean_terminated_length": 993.92783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 248.6, "epoch": 1.0697115384615385, "grad_norm": 0.000556574494112283, "learning_rate": 3.575721153846154e-06, "loss": -0.0028, "num_tokens": 1065407151.0, "reward": 1.31825795173645, "reward_std": 0.10573276579380035, "rewards/accuracy_reward": 0.7677951216697693, "rewards/brier_reward": 0.8717447876930237, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9969617843627929, "rewards/mean_confidence_reward": 0.7086805701255798, "sampling/batch_mean_priority_error": 0.02726226362076916, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.35, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.00838301219046116, "sampling/priority_kl": 0.029999752715229987, "sampling/priority_scale": 0.7115830718772486, "sampling/prob_entropy": 10.278950309753418, "sampling/prob_max": 4.62854171928484e-05, "sampling/prob_min": 1.960499575943686e-05, "sampling/prompt_draws_max": 5.6, "sampling/prompt_draws_mean": 1.0631999969482422, "sampling/prompt_draws_total": 31896.0, "sampling/seen_fraction": 0.7251199960708619, "sampling/unseen_fraction": 0.2748800039291382, "signal/accuracy_reward/centered_abs_mean": 0.11357964277267456, "signal/accuracy_reward/group_std_mean": 0.15543469190597534, "signal/accuracy_reward/group_zero_std_frac": 0.5388888835906982, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05678982138633728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05678982138633728, "signal/advantage_abs_mean": 0.07521249949932099, "signal/advantage_pre_scale_abs_mean": 0.07521249949932099, "signal/advantage_pre_scale_std": 0.15374746918678284, "signal/advantage_std": 0.15374746918678284, "signal/brier_reward/centered_abs_mean": 0.05601532757282257, "signal/brier_reward/group_std_mean": 0.07705468088388442, "signal/brier_reward/group_zero_std_frac": 0.14444444626569747, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028007663786411285, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.028007663786411285, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004931640531867743, "signal/format_reward/group_std_mean": 0.009485878376290202, "signal/format_reward/group_zero_std_frac": 0.9583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0024658202659338714, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0024658202659338714, "signal/mean_confidence_reward/centered_abs_mean": 0.05166775062680244, "signal/mean_confidence_reward/group_std_mean": 0.0685989424586296, "signal/mean_confidence_reward/group_zero_std_frac": 0.14722222536802293, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.166774883491598e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.166774883491598e-07, "step": 445 }, { "calibration/aurc": 0.17119985867040605, "calibration/batch_distribution_entropy": 0.6938781727692345, "calibration/batch_entropy_100bins": 0.37935902900642526, "calibration/batch_entropy_10bins": 0.6938781727692345, "calibration/batch_entropy_50bins": 0.44657531096869957, "calibration/batch_uniqueness": 0.5243673203869619, "calibration/confidence_entropy": 0.504338539628425, "calibration/coverage@0%": 0.017754569190600523, "calibration/coverage@1%": 0.017754569190600523, "calibration/coverage@10%": 0.4992860639686684, "calibration/coverage@15%": 0.5238019473455179, "calibration/coverage@20%": 0.6404686140121845, "calibration/coverage@25%": 0.7448759791122714, "calibration/coverage@30%": 0.7949480526544821, "calibration/coverage@5%": 0.37684127502175807, "calibration/distribution_entropy_10": 0.6938781727692345, "calibration/distribution_entropy_100": 0.37935902900642526, "calibration/ece": 0.10688887075718015, "calibration/mean_confidence": 0.6589066579634466, "calibration/unique_confidence_per_question": 0.022395833333333334, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 3458.6, "completions/max_terminated_length": 3458.6, "completions/mean_length": 973.0631225585937, "completions/mean_terminated_length": 973.7551879882812, "completions/min_length": 139.0, "completions/min_terminated_length": 275.4, "epoch": 1.0817307692307692, "grad_norm": 0.0006742961122654378, "learning_rate": 3.5456730769230774e-06, "loss": 0.0009, "num_tokens": 1079686790.0, "reward": 1.3066105604171754, "reward_std": 0.1120601698756218, "rewards/accuracy_reward": 0.7502604126930237, "rewards/brier_reward": 0.8643359184265137, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9986111164093018, "rewards/mean_confidence_reward": 0.6820138692855835, "sampling/batch_mean_priority_error": 0.03446128472222223, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3777777777777778, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008412830717861652, "sampling/priority_kl": 0.03000001423060894, "sampling/priority_scale": 0.7117285073036328, "sampling/prob_entropy": 10.278955078125, "sampling/prob_max": 4.643480133381672e-05, "sampling/prob_min": 1.9433926718193105e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.0752000093460083, "sampling/prompt_draws_total": 32256.0, "sampling/seen_fraction": 0.7297666668891907, "sampling/unseen_fraction": 0.2702333331108093, "signal/accuracy_reward/centered_abs_mean": 0.13707139641046523, "signal/accuracy_reward/group_std_mean": 0.1787209153175354, "signal/accuracy_reward/group_zero_std_frac": 0.49166667461395264, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06853569820523261, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06853569820523261, "signal/advantage_abs_mean": 0.08267289400100708, "signal/advantage_pre_scale_abs_mean": 0.08267289400100708, "signal/advantage_pre_scale_std": 0.15754978358745575, "signal/advantage_std": 0.15754978358745575, "signal/brier_reward/centered_abs_mean": 0.06549736708402634, "signal/brier_reward/group_std_mean": 0.08823522627353668, "signal/brier_reward/group_zero_std_frac": 0.14166666865348815, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03274868354201317, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03274868354201317, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0026150173507630825, "signal/format_reward/group_std_mean": 0.006526978686451912, "signal/format_reward/group_zero_std_frac": 0.9666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013075086753815413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013075086753815413, "signal/mean_confidence_reward/centered_abs_mean": 0.06105956807732582, "signal/mean_confidence_reward/group_std_mean": 0.07881901264190674, "signal/mean_confidence_reward/group_zero_std_frac": 0.1472222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.105956799729029e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.105956799729029e-07, "step": 450 }, { "epoch": 1.0817307692307692, "eval_calibration/aurc": 0.1456926255290758, "eval_calibration/batch_distribution_entropy": 0.6996202405927568, "eval_calibration/batch_entropy_100bins": 0.3839213205044793, "eval_calibration/batch_entropy_10bins": 0.6996202405927568, "eval_calibration/batch_entropy_50bins": 0.45194596670295084, "eval_calibration/batch_uniqueness": 0.5007183364839319, "eval_calibration/confidence_entropy": 0.4720367910302038, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.45304347826086955, "eval_calibration/coverage@15%": 0.6547826086956522, "eval_calibration/coverage@20%": 0.7965217391304348, "eval_calibration/coverage@25%": 0.88, "eval_calibration/coverage@30%": 0.9817391304347826, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6996202405927568, "eval_calibration/distribution_entropy_100": 0.3839213205044793, "eval_calibration/ece": 0.021217391304347813, "eval_calibration/mean_confidence": 0.6989565217391304, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.001736111111111105, "eval_completions/max_length": 2455.6666666666665, "eval_completions/max_terminated_length": 2455.6666666666665, "eval_completions/mean_length": 979.8416849772135, "eval_completions/mean_terminated_length": 981.5661824544271, "eval_completions/min_length": 290.1666666666667, "eval_completions/min_terminated_length": 340.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 1079686790.0, "eval_reward": 1.2733750542004902, "eval_reward_std": 0.30162295202414197, "eval_rewards/accuracy_reward": 0.6979166666666666, "eval_rewards/brier_reward": 0.850555549065272, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9982638855775198, "eval_rewards/mean_confidence_reward": 0.6977430780728658, "eval_runtime": 147.3716, "eval_samples_per_second": 6.786, "eval_signal/accuracy_reward/centered_abs_mean": 0.4115668386220932, "eval_signal/accuracy_reward/group_std_mean": 0.4599919815858205, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2057834193110466, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2057834193110466, "eval_signal/advantage_abs_mean": 0.25963140030701953, "eval_signal/advantage_pre_scale_abs_mean": 0.25963140030701953, "eval_signal/advantage_pre_scale_std": 0.2986646344264348, "eval_signal/advantage_std": 0.2986646344264348, "eval_signal/brier_reward/centered_abs_mean": 0.15869846940040588, "eval_signal/brier_reward/group_std_mean": 0.21192201475302377, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07934923470020294, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.07934923470020294, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/group_std_mean": 0.009820927555362383, "eval_signal/format_reward/group_zero_std_frac": 0.9444444477558136, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.2031846543153127, "eval_signal/mean_confidence_reward/group_std_mean": 0.2419320916136106, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.031846577210672e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.031846577210672e-06, "eval_steps_per_second": 0.041, "step": 450 }, { "epoch": 1.0817307692307692, "step": 450, "train_probe_calibration/aurc": 0.09937392783360088, "train_probe_calibration/batch_distribution_entropy": 0.6987087019871145, "train_probe_calibration/batch_entropy_100bins": 0.38393012567447593, "train_probe_calibration/batch_entropy_10bins": 0.6987087019871145, "train_probe_calibration/batch_entropy_50bins": 0.4519563320066044, "train_probe_calibration/batch_uniqueness": 0.4989943289224953, "train_probe_calibration/confidence_entropy": 0.4712346490131543, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.6556521739130434, "train_probe_calibration/coverage@15%": 0.7930434782608695, "train_probe_calibration/coverage@20%": 0.8852173913043478, "train_probe_calibration/coverage@25%": 0.9808695652173913, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.4539130434782609, "train_probe_calibration/distribution_entropy_10": 0.6987087019871145, "train_probe_calibration/distribution_entropy_100": 0.38393012567447593, "train_probe_calibration/ece": 0.04843478260869559, "train_probe_calibration/mean_confidence": 0.699391304347826, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.005034722222222232, "train_probe_completions/max_length": 2869.8333333333335, "train_probe_completions/max_terminated_length": 2869.8333333333335, "train_probe_completions/mean_length": 993.7186075846354, "train_probe_completions/mean_terminated_length": 998.8824666341146, "train_probe_completions/min_length": 166.16666666666666, "train_probe_completions/min_terminated_length": 271.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 1079686790.0, "train_probe_reward": 1.3065911928812664, "train_probe_reward_std": 0.27413928508758545, "train_probe_rewards/accuracy_reward": 0.7465277711550394, "train_probe_rewards/brier_reward": 0.8683767517407736, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9982638955116272, "train_probe_rewards/mean_confidence_reward": 0.6981770694255829, "train_probe_runtime": 171.2718, "train_probe_samples_per_second": 5.839, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3681640625, "train_probe_signal/accuracy_reward/group_std_mean": 0.43382490674654645, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18408203125, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18408203125, "train_probe_signal/advantage_abs_mean": 0.22712557514508566, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22712557514508566, "train_probe_signal/advantage_pre_scale_std": 0.27301861842473346, "train_probe_signal/advantage_std": 0.27301861842473346, "train_probe_signal/brier_reward/centered_abs_mean": 0.13910971085230509, "train_probe_signal/brier_reward/group_std_mean": 0.18819688508907953, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06955485542615254, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.06955485542615254, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/format_reward/group_std_mean": 0.009820927555362383, "train_probe_signal/format_reward/group_zero_std_frac": 0.944444457689921, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.2051540638009707, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.2430253947774569, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.051540567056994e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.051540567056994e-06, "train_probe_steps_per_second": 0.035 }, { "calibration/aurc": 0.12143333430388896, "calibration/batch_distribution_entropy": 0.6992091580531643, "calibration/batch_entropy_100bins": 0.3825769752796119, "calibration/batch_entropy_10bins": 0.6992091580531643, "calibration/batch_entropy_50bins": 0.4503634252555604, "calibration/batch_uniqueness": 0.5406986655035744, "calibration/confidence_entropy": 0.48598507484551545, "calibration/coverage@0%": 0.07604166666666666, "calibration/coverage@1%": 0.10104166666666667, "calibration/coverage@10%": 0.3617887837249782, "calibration/coverage@15%": 0.7390489012184508, "calibration/coverage@20%": 0.8335223563968668, "calibration/coverage@25%": 0.9101827676240208, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.32266236945169713, "calibration/distribution_entropy_10": 0.6992091580531643, "calibration/distribution_entropy_100": 0.3825769752796119, "calibration/ece": 0.15797976501305483, "calibration/mean_confidence": 0.6753209312445605, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001388888888888906, "completions/max_length": 3361.2, "completions/max_terminated_length": 3361.2, "completions/mean_length": 1019.3338500976563, "completions/mean_terminated_length": 1020.7703857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 292.8, "epoch": 1.09375, "grad_norm": 0.0006985415820963681, "learning_rate": 3.5156250000000003e-06, "loss": 0.0013, "num_tokens": 1094556460.0, "reward": 1.2889756202697753, "reward_std": 0.12136874049901962, "rewards/accuracy_reward": 0.7276909708976745, "rewards/brier_reward": 0.8517222285270691, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.998524296283722, "rewards/mean_confidence_reward": 0.6873003482818604, "sampling/batch_mean_priority_error": 0.04208454441084229, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3555555555555555, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008451656624674796, "sampling/priority_kl": 0.02999986857175827, "sampling/priority_scale": 0.7116955935722217, "sampling/prob_entropy": 10.278954315185548, "sampling/prob_max": 4.657706740545109e-05, "sampling/prob_min": 1.9494253501761703e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.0871999979019165, "sampling/prompt_draws_total": 32616.0, "sampling/seen_fraction": 0.7340999960899353, "sampling/unseen_fraction": 0.2659000039100647, "signal/accuracy_reward/centered_abs_mean": 0.14761827141046524, "signal/accuracy_reward/group_std_mean": 0.1924243152141571, "signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07380913570523262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07380913570523262, "signal/advantage_abs_mean": 0.08975814431905746, "signal/advantage_pre_scale_abs_mean": 0.08975814431905746, "signal/advantage_pre_scale_std": 0.16560624539852142, "signal/advantage_std": 0.16560624539852142, "signal/brier_reward/centered_abs_mean": 0.07768742889165878, "signal/brier_reward/group_std_mean": 0.10408626347780228, "signal/brier_reward/group_zero_std_frac": 0.21111111640930175, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03884371444582939, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03884371444582939, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0028591578942723573, "signal/format_reward/group_std_mean": 0.008347788266837597, "signal/format_reward/group_zero_std_frac": 0.9527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014295789471361787, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014295789471361787, "signal/mean_confidence_reward/centered_abs_mean": 0.06654405444860459, "signal/mean_confidence_reward/group_std_mean": 0.0879412516951561, "signal/mean_confidence_reward/group_zero_std_frac": 0.23055556118488313, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.654405297012999e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.654405297012999e-07, "step": 455 }, { "calibration/aurc": 0.10392280207853939, "calibration/batch_distribution_entropy": 0.5815910248331363, "calibration/batch_entropy_100bins": 0.3225027846771071, "calibration/batch_entropy_10bins": 0.5815910248331363, "calibration/batch_entropy_50bins": 0.37964506007055215, "calibration/batch_uniqueness": 0.2745551215277778, "calibration/confidence_entropy": 0.437338301182815, "calibration/coverage@0%": 0.15208333333333332, "calibration/coverage@1%": 0.15208333333333332, "calibration/coverage@10%": 0.6505208333333334, "calibration/coverage@15%": 0.8020833333333334, "calibration/coverage@20%": 0.8286458333333334, "calibration/coverage@25%": 0.8755208333333334, "calibration/coverage@30%": 0.9072916666666668, "calibration/coverage@5%": 0.4265625, "calibration/distribution_entropy_10": 0.5815910248331363, "calibration/distribution_entropy_100": 0.3225027846771071, "calibration/ece": 0.11749999999999991, "calibration/mean_confidence": 0.6855208333333334, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0012152777777777902, "completions/max_length": 3590.2, "completions/max_terminated_length": 3590.2, "completions/mean_length": 1007.80869140625, "completions/mean_terminated_length": 1009.020703125, "completions/min_length": 68.8, "completions/min_terminated_length": 282.2, "epoch": 1.1057692307692308, "grad_norm": 0.0006103526102378964, "learning_rate": 3.4855769230769233e-06, "loss": -0.0002, "num_tokens": 1109287856.0, "reward": 1.2962172985076905, "reward_std": 0.10616759359836578, "rewards/accuracy_reward": 0.7383680582046509, "rewards/brier_reward": 0.8554409980773926, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9986111044883728, "rewards/mean_confidence_reward": 0.7227604031562805, "sampling/batch_mean_priority_error": 0.05214705427814047, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.34444444444444444, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008502156287431718, "sampling/priority_kl": 0.029999998584389686, "sampling/priority_scale": 0.711798530840315, "sampling/prob_entropy": 10.278949737548828, "sampling/prob_max": 4.672173818107694e-05, "sampling/prob_min": 1.955234147317242e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.0992000102996826, "sampling/prompt_draws_total": 32976.0, "sampling/seen_fraction": 0.7384333372116089, "sampling/unseen_fraction": 0.26156666278839114, "signal/accuracy_reward/centered_abs_mean": 0.10821397453546525, "signal/accuracy_reward/group_std_mean": 0.14976944923400878, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05410698726773262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05410698726773262, "signal/advantage_abs_mean": 0.0763231635093689, "signal/advantage_pre_scale_abs_mean": 0.0763231635093689, "signal/advantage_pre_scale_std": 0.15467407703399658, "signal/advantage_std": 0.15467407703399658, "signal/brier_reward/centered_abs_mean": 0.07721022069454193, "signal/brier_reward/group_std_mean": 0.10425049513578415, "signal/brier_reward/group_zero_std_frac": 0.2888888865709305, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03860511034727097, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03860511034727097, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0025933159748092295, "signal/format_reward/group_std_mean": 0.005929096881300211, "signal/format_reward/group_zero_std_frac": 0.9722222089767456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012966579874046148, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012966579874046148, "signal/mean_confidence_reward/centered_abs_mean": 0.062055671215057374, "signal/mean_confidence_reward/group_std_mean": 0.08193774446845055, "signal/mean_confidence_reward/group_zero_std_frac": 0.3277777761220932, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.205566592143441e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.205566592143441e-07, "step": 460 }, { "calibration/aurc": 0.08059233839576513, "calibration/batch_distribution_entropy": 0.4605690366871271, "calibration/batch_entropy_100bins": 0.2529573163787063, "calibration/batch_entropy_10bins": 0.4605690366871271, "calibration/batch_entropy_50bins": 0.29777726002592436, "calibration/batch_uniqueness": -0.004366139578864012, "calibration/confidence_entropy": 0.4054791820657573, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.16510416666666666, "calibration/coverage@10%": 0.5729166666666667, "calibration/coverage@15%": 0.9066294060052218, "calibration/coverage@20%": 0.9269704634464752, "calibration/coverage@25%": 0.954622225848564, "calibration/coverage@30%": 0.9963446475195823, "calibration/coverage@5%": 0.5007316800714924, "calibration/distribution_entropy_10": 0.4605690366871271, "calibration/distribution_entropy_100": 0.2529573163787063, "calibration/ece": 0.10463188608756416, "calibration/mean_confidence": 0.7691945551062004, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 3376.0, "completions/max_terminated_length": 3376.0, "completions/mean_length": 1011.2415893554687, "completions/mean_terminated_length": 1014.4902587890625, "completions/min_length": 65.2, "completions/min_terminated_length": 284.6, "epoch": 1.1177884615384615, "grad_norm": 0.000557567982468754, "learning_rate": 3.4555288461538466e-06, "loss": -0.0009, "num_tokens": 1124026927.0, "reward": 1.2991013526916504, "reward_std": 0.11327702552080154, "rewards/accuracy_reward": 0.7419270873069763, "rewards/brier_reward": 0.8595590353012085, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967013835906983, "rewards/mean_confidence_reward": 0.7529860854148864, "sampling/batch_mean_priority_error": 0.053493539116674746, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32499999999999996, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008554048836231232, "sampling/priority_kl": 0.029999839141964912, "sampling/priority_scale": 0.711510258843191, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 4.685054227593355e-05, "sampling/prob_min": 1.9613161566667258e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1112000226974488, "sampling/prompt_draws_total": 33336.0, "sampling/seen_fraction": 0.7422599911689758, "sampling/unseen_fraction": 0.25774000883102416, "signal/accuracy_reward/centered_abs_mean": 0.12089301198720932, "signal/accuracy_reward/group_std_mean": 0.16041539907455443, "signal/accuracy_reward/group_zero_std_frac": 0.5388889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06044650599360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06044650599360466, "signal/advantage_abs_mean": 0.08364090770483017, "signal/advantage_pre_scale_abs_mean": 0.08364090770483017, "signal/advantage_pre_scale_std": 0.16810988783836364, "signal/advantage_std": 0.16810988783836364, "signal/brier_reward/centered_abs_mean": 0.07260150983929634, "signal/brier_reward/group_std_mean": 0.09838626384735108, "signal/brier_reward/group_zero_std_frac": 0.35833334028720853, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03630075491964817, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03630075491964817, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00542534728301689, "signal/format_reward/group_std_mean": 0.01027821134775877, "signal/format_reward/group_zero_std_frac": 0.955555546283722, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002712673641508445, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002712673641508445, "signal/mean_confidence_reward/centered_abs_mean": 0.061685122549533844, "signal/mean_confidence_reward/group_std_mean": 0.08196858763694763, "signal/mean_confidence_reward/group_zero_std_frac": 0.397222226858139, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.168512015847227e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.168512015847227e-07, "step": 465 }, { "calibration/aurc": 0.17243657087985084, "calibration/batch_distribution_entropy": 0.4785682077586254, "calibration/batch_entropy_100bins": 0.26017619224627336, "calibration/batch_entropy_10bins": 0.4785682077586254, "calibration/batch_entropy_50bins": 0.3062752038967914, "calibration/batch_uniqueness": 0.013543680388338326, "calibration/confidence_entropy": 0.4068504298385657, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4390859952770221, "calibration/coverage@15%": 0.6420314426741135, "calibration/coverage@20%": 0.7307174703019095, "calibration/coverage@25%": 0.7581151832460733, "calibration/coverage@30%": 0.7664921465968586, "calibration/coverage@5%": 0.13838120104438642, "calibration/distribution_entropy_10": 0.4785682077586254, "calibration/distribution_entropy_100": 0.26017619224627336, "calibration/ece": 0.11765368328499626, "calibration/mean_confidence": 0.7780121137494821, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003472222222222232, "completions/max_length": 3773.8, "completions/max_terminated_length": 3773.8, "completions/mean_length": 1097.7507080078126, "completions/mean_terminated_length": 1101.61884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 287.6, "epoch": 1.1298076923076923, "grad_norm": 0.0006817008252255619, "learning_rate": 3.4254807692307695e-06, "loss": -0.0026, "num_tokens": 1139782103.0, "reward": 1.2943272113800048, "reward_std": 0.1254418209195137, "rewards/accuracy_reward": 0.7381944417953491, "rewards/brier_reward": 0.8540902853012085, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9963541626930237, "rewards/mean_confidence_reward": 0.7828645825386047, "sampling/batch_mean_priority_error": 0.05845517749135944, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.38333333333333336, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008615374192595482, "sampling/priority_kl": 0.030000054463744163, "sampling/priority_scale": 0.7117458641761913, "sampling/prob_entropy": 10.278947448730468, "sampling/prob_max": 4.699580094893463e-05, "sampling/prob_min": 1.9646077998913824e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.123200011253357, "sampling/prompt_draws_total": 33696.0, "sampling/seen_fraction": 0.7464800000190734, "sampling/unseen_fraction": 0.2535199999809265, "signal/accuracy_reward/centered_abs_mean": 0.11816406399011611, "signal/accuracy_reward/group_std_mean": 0.1576286733150482, "signal/accuracy_reward/group_zero_std_frac": 0.5388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05908203199505806, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05908203199505806, "signal/advantage_abs_mean": 0.09323482066392899, "signal/advantage_pre_scale_abs_mean": 0.09323482066392899, "signal/advantage_pre_scale_std": 0.18418655097484588, "signal/advantage_std": 0.18418655097484588, "signal/brier_reward/centered_abs_mean": 0.08546116352081298, "signal/brier_reward/group_std_mean": 0.11382831633090973, "signal/brier_reward/group_zero_std_frac": 0.37777777910232546, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04273058176040649, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04273058176040649, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006152343726716936, "signal/format_reward/group_std_mean": 0.011012036725878716, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003076171863358468, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003076171863358468, "signal/mean_confidence_reward/centered_abs_mean": 0.0545052208006382, "signal/mean_confidence_reward/group_std_mean": 0.0735972911119461, "signal/mean_confidence_reward/group_zero_std_frac": 0.43888888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.450521996408497e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.450521996408497e-07, "step": 470 }, { "calibration/aurc": 0.2685868515086937, "calibration/batch_distribution_entropy": 0.41914427430692525, "calibration/batch_entropy_100bins": 0.22963451571698973, "calibration/batch_entropy_10bins": 0.41914427430692525, "calibration/batch_entropy_50bins": 0.2703220364467047, "calibration/batch_uniqueness": -0.12729341363433064, "calibration/confidence_entropy": 0.3964918726873754, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.13368146214099216, "calibration/coverage@15%": 0.14203655352480418, "calibration/coverage@20%": 0.30809399477806787, "calibration/coverage@25%": 0.5036196234712107, "calibration/coverage@30%": 0.5528253401126838, "calibration/coverage@5%": 0.12741514360313316, "calibration/distribution_entropy_10": 0.41914427430692525, "calibration/distribution_entropy_100": 0.22963451571698973, "calibration/ece": 0.1977212607645093, "calibration/mean_confidence": 0.7959806739842428, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002777777777777812, "completions/max_length": 4003.8, "completions/max_terminated_length": 4003.8, "completions/mean_length": 1113.821875, "completions/mean_terminated_length": 1116.839892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 1.1418269230769231, "grad_norm": 0.0006080775056034327, "learning_rate": 3.3954326923076925e-06, "loss": -0.0022, "num_tokens": 1155714611.0, "reward": 1.2394050359725952, "reward_std": 0.12434252202510834, "rewards/accuracy_reward": 0.6710069417953491, "rewards/brier_reward": 0.8106519103050231, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9971354246139527, "rewards/mean_confidence_reward": 0.7947829842567444, "sampling/batch_mean_priority_error": 0.09056192310100955, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3333333333333333, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.00870037693530321, "sampling/priority_kl": 0.03000004291534424, "sampling/priority_scale": 0.7121343434089795, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 4.71480016130954e-05, "sampling/prob_min": 1.961133639269974e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1351999759674072, "sampling/prompt_draws_total": 34056.0, "sampling/seen_fraction": 0.7507666826248169, "sampling/unseen_fraction": 0.24923331737518312, "signal/accuracy_reward/centered_abs_mean": 0.10917968600988388, "signal/accuracy_reward/group_std_mean": 0.1482059821486473, "signal/accuracy_reward/group_zero_std_frac": 0.5611111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05458984300494194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05458984300494194, "signal/advantage_abs_mean": 0.09205847829580308, "signal/advantage_pre_scale_abs_mean": 0.09205847829580308, "signal/advantage_pre_scale_std": 0.18328936994075776, "signal/advantage_std": 0.18328936994075776, "signal/brier_reward/centered_abs_mean": 0.08727029114961624, "signal/brier_reward/group_std_mean": 0.1160641372203827, "signal/brier_reward/group_zero_std_frac": 0.3999999940395355, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04363514557480812, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04363514557480812, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00482313372194767, "signal/format_reward/group_std_mean": 0.009565300587564707, "signal/format_reward/group_zero_std_frac": 0.9583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002411566860973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002411566860973835, "signal/mean_confidence_reward/centered_abs_mean": 0.054275186359882356, "signal/mean_confidence_reward/group_std_mean": 0.0711105227470398, "signal/mean_confidence_reward/group_zero_std_frac": 0.46111111640930175, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.42751837429023e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.42751837429023e-07, "step": 475 }, { "calibration/aurc": 0.16509835396682498, "calibration/batch_distribution_entropy": 0.5817777701404863, "calibration/batch_entropy_100bins": 0.3238100239509861, "calibration/batch_entropy_10bins": 0.5817777701404863, "calibration/batch_entropy_50bins": 0.3811839209928083, "calibration/batch_uniqueness": 0.2799376063331884, "calibration/confidence_entropy": 0.44051279536966287, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.46918061174987463, "calibration/coverage@15%": 0.5476342486155917, "calibration/coverage@20%": 0.5873400706598004, "calibration/coverage@25%": 0.7842809230271447, "calibration/coverage@30%": 0.8664871199194095, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.5817777701404863, "calibration/distribution_entropy_100": 0.3238100239509861, "calibration/ece": 0.08974379444828766, "calibration/mean_confidence": 0.7054703479311134, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004600694444444442, "completions/max_length": 3721.2, "completions/max_terminated_length": 3721.2, "completions/mean_length": 1088.9832641601563, "completions/mean_terminated_length": 1094.0543090820313, "completions/min_length": 0.0, "completions/min_terminated_length": 288.6, "epoch": 1.1538461538461537, "grad_norm": 0.0006057985010556877, "learning_rate": 3.365384615384616e-06, "loss": -0.0027, "num_tokens": 1171333938.0, "reward": 1.311488938331604, "reward_std": 0.1304037630558014, "rewards/accuracy_reward": 0.7541666626930237, "rewards/brier_reward": 0.8734835147857666, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.995312488079071, "rewards/mean_confidence_reward": 0.7591232657432556, "sampling/batch_mean_priority_error": 0.0432108694971293, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3416666666666667, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008764292113482952, "sampling/priority_kl": 0.029999880865216256, "sampling/priority_scale": 0.7124254643684254, "sampling/prob_entropy": 10.278950309753418, "sampling/prob_max": 4.7294204705394804e-05, "sampling/prob_min": 1.9665095533127896e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1471999883651733, "sampling/prompt_draws_total": 34416.0, "sampling/seen_fraction": 0.7548466563224793, "sampling/unseen_fraction": 0.24515334367752076, "signal/accuracy_reward/centered_abs_mean": 0.1254557266831398, "signal/accuracy_reward/group_std_mean": 0.17018299102783202, "signal/accuracy_reward/group_zero_std_frac": 0.5000000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0627278633415699, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0627278633415699, "signal/advantage_abs_mean": 0.09320852905511856, "signal/advantage_pre_scale_abs_mean": 0.09320852905511856, "signal/advantage_pre_scale_std": 0.18777829706668853, "signal/advantage_std": 0.18777829706668853, "signal/brier_reward/centered_abs_mean": 0.07927191704511642, "signal/brier_reward/group_std_mean": 0.10883116573095322, "signal/brier_reward/group_zero_std_frac": 0.29166666865348817, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03963595852255821, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03963595852255821, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.008398437546566129, "signal/format_reward/group_std_mean": 0.017096645571291445, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0041992187732830645, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0041992187732830645, "signal/mean_confidence_reward/centered_abs_mean": 0.05635145679116249, "signal/mean_confidence_reward/group_std_mean": 0.07729533314704895, "signal/mean_confidence_reward/group_zero_std_frac": 0.347222226858139, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.635145612359338e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.635145612359338e-07, "step": 480 }, { "calibration/aurc": 0.14424101126813588, "calibration/batch_distribution_entropy": 0.6692289095471307, "calibration/batch_entropy_100bins": 0.3789358043962861, "calibration/batch_entropy_10bins": 0.6692289095471307, "calibration/batch_entropy_50bins": 0.44607709780536053, "calibration/batch_uniqueness": 0.5106775389038136, "calibration/confidence_entropy": 0.49288429135846296, "calibration/coverage@0%": 0.11137752381993904, "calibration/coverage@1%": 0.11137752381993904, "calibration/coverage@10%": 0.31678621490802383, "calibration/coverage@15%": 0.5255120119247787, "calibration/coverage@20%": 0.7083988676090485, "calibration/coverage@25%": 0.8282933124057072, "calibration/coverage@30%": 0.9046870268989673, "calibration/coverage@5%": 0.2693903815746904, "calibration/distribution_entropy_10": 0.6692289095471307, "calibration/distribution_entropy_100": 0.3789358043962861, "calibration/ece": 0.08697532913278365, "calibration/mean_confidence": 0.6685785178846475, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003732638888888906, "completions/max_length": 3593.6, "completions/max_terminated_length": 3593.6, "completions/mean_length": 1068.6515747070312, "completions/mean_terminated_length": 1072.6423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 291.6, "epoch": 1.1658653846153846, "grad_norm": 0.00042986380867660046, "learning_rate": 3.3353365384615388e-06, "loss": -0.0028, "num_tokens": 1186752388.0, "reward": 1.291651439666748, "reward_std": 0.10780009925365448, "rewards/accuracy_reward": 0.7271701335906983, "rewards/brier_reward": 0.8598515510559082, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9962673664093018, "rewards/mean_confidence_reward": 0.695164954662323, "sampling/batch_mean_priority_error": 0.040754491299291075, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.38888888888888884, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008808174729347229, "sampling/priority_kl": 0.03000020273029804, "sampling/priority_scale": 0.7133005202980712, "sampling/prob_entropy": 10.278949737548828, "sampling/prob_max": 4.746251870528795e-05, "sampling/prob_min": 1.9713817891897634e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1592000007629395, "sampling/prompt_draws_total": 34776.0, "sampling/seen_fraction": 0.7593999981880188, "sampling/unseen_fraction": 0.2406000018119812, "signal/accuracy_reward/centered_abs_mean": 0.11838650405406952, "signal/accuracy_reward/group_std_mean": 0.16019512414932252, "signal/accuracy_reward/group_zero_std_frac": 0.5333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05919325202703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05919325202703476, "signal/advantage_abs_mean": 0.07694000229239464, "signal/advantage_pre_scale_abs_mean": 0.07694000229239464, "signal/advantage_pre_scale_std": 0.15797904133796692, "signal/advantage_std": 0.15797904133796692, "signal/brier_reward/centered_abs_mean": 0.06782751232385635, "signal/brier_reward/group_std_mean": 0.09157582372426987, "signal/brier_reward/group_zero_std_frac": 0.2694444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033913756161928175, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033913756161928175, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006906467000953853, "signal/format_reward/group_std_mean": 0.015525073930621146, "signal/format_reward/group_zero_std_frac": 0.9277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0034532335004769264, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0034532335004769264, "signal/mean_confidence_reward/centered_abs_mean": 0.05602431297302246, "signal/mean_confidence_reward/group_std_mean": 0.07353781908750534, "signal/mean_confidence_reward/group_zero_std_frac": 0.29722222983837127, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.602431428997079e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.602431428997079e-07, "step": 485 }, { "calibration/aurc": 0.07385052157310985, "calibration/batch_distribution_entropy": 0.6865127587476862, "calibration/batch_entropy_100bins": 0.38104507949249855, "calibration/batch_entropy_10bins": 0.6865127587476862, "calibration/batch_entropy_50bins": 0.44856010232083676, "calibration/batch_uniqueness": 0.5568101683217519, "calibration/confidence_entropy": 0.5090127053537694, "calibration/coverage@0%": 0.10793520942408377, "calibration/coverage@1%": 0.17477594049457984, "calibration/coverage@10%": 0.6945750061244469, "calibration/coverage@15%": 0.8549916329618883, "calibration/coverage@20%": 0.9536036771105308, "calibration/coverage@25%": 0.9838120104438642, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.488348268242808, "calibration/distribution_entropy_10": 0.6865127587476862, "calibration/distribution_entropy_100": 0.38104507949249855, "calibration/ece": 0.15832489957513127, "calibration/mean_confidence": 0.6925591290156485, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006250000000000022, "completions/max_length": 3622.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 1027.5976806640624, "completions/mean_terminated_length": 1034.0362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 233.6, "epoch": 1.1778846153846154, "grad_norm": 0.0005065092700533569, "learning_rate": 3.3052884615384617e-06, "loss": -0.0066, "num_tokens": 1201653385.0, "reward": 1.2866623878479004, "reward_std": 0.10978196710348129, "rewards/accuracy_reward": 0.7243923664093017, "rewards/brier_reward": 0.8551692605018616, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.99375, "rewards/mean_confidence_reward": 0.6556684136390686, "sampling/batch_mean_priority_error": 0.031650594283802194, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.28055555555555556, "sampling/error_ema_max": 0.20249629020690918, "sampling/error_ema_mean": 0.008839315734803677, "sampling/priority_kl": 0.029999951645731927, "sampling/priority_scale": 0.7136028229957446, "sampling/prob_entropy": 10.278948211669922, "sampling/prob_max": 4.760698429890908e-05, "sampling/prob_min": 1.976646854018327e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1711999893188476, "sampling/prompt_draws_total": 35136.0, "sampling/seen_fraction": 0.7632866621017456, "sampling/unseen_fraction": 0.23671333789825438, "signal/accuracy_reward/centered_abs_mean": 0.11789821982383727, "signal/accuracy_reward/group_std_mean": 0.16054655611515045, "signal/accuracy_reward/group_zero_std_frac": 0.5277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05894910991191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05894910991191864, "signal/advantage_abs_mean": 0.07676194682717323, "signal/advantage_pre_scale_abs_mean": 0.07676194682717323, "signal/advantage_pre_scale_std": 0.1554905205965042, "signal/advantage_std": 0.1554905205965042, "signal/brier_reward/centered_abs_mean": 0.0649530403316021, "signal/brier_reward/group_std_mean": 0.09048911035060883, "signal/brier_reward/group_zero_std_frac": 0.1361111134290695, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03247652016580105, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03247652016580105, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01105685792863369, "signal/format_reward/group_std_mean": 0.02208307832479477, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005528428964316845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005528428964316845, "signal/mean_confidence_reward/centered_abs_mean": 0.06184353157877922, "signal/mean_confidence_reward/group_std_mean": 0.08153964579105377, "signal/mean_confidence_reward/group_zero_std_frac": 0.14444444626569747, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.184352855598263e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.184352855598263e-07, "step": 490 }, { "calibration/aurc": 0.06241843717141251, "calibration/batch_distribution_entropy": 0.6022017445135018, "calibration/batch_entropy_100bins": 0.32593844547926, "calibration/batch_entropy_10bins": 0.6022017445135018, "calibration/batch_entropy_50bins": 0.3836894643783205, "calibration/batch_uniqueness": 0.3656484331557499, "calibration/confidence_entropy": 0.46177567770797545, "calibration/coverage@0%": 0.31406934962374067, "calibration/coverage@1%": 0.4649504376487631, "calibration/coverage@10%": 0.7808990655147648, "calibration/coverage@15%": 0.8241748638517699, "calibration/coverage@20%": 0.8997835679177838, "calibration/coverage@25%": 0.9518068588025022, "calibration/coverage@30%": 0.975, "calibration/coverage@5%": 0.5931389600144851, "calibration/distribution_entropy_10": 0.6022017445135018, "calibration/distribution_entropy_100": 0.32593844547926, "calibration/ece": 0.11784205514953403, "calibration/mean_confidence": 0.7561079918549011, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005555555555555536, "completions/max_length": 3740.2, "completions/max_terminated_length": 3740.2, "completions/mean_length": 1011.887255859375, "completions/mean_terminated_length": 1017.530224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 273.2, "epoch": 1.1899038461538463, "grad_norm": 0.000543592032045126, "learning_rate": 3.2752403846153846e-06, "loss": -0.0054, "num_tokens": 1216408662.0, "reward": 1.2978950262069702, "reward_std": 0.11456762105226517, "rewards/accuracy_reward": 0.7465277791023255, "rewards/brier_reward": 0.8549774646759033, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9942708373069763, "rewards/mean_confidence_reward": 0.6990104198455811, "sampling/batch_mean_priority_error": 0.03569228065040018, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3388888888888889, "sampling/error_ema_max": 0.20283221006393432, "sampling/error_ema_mean": 0.008870593085885049, "sampling/priority_kl": 0.030000014975667, "sampling/priority_scale": 0.7138257563346997, "sampling/prob_entropy": 10.278950691223145, "sampling/prob_max": 4.774942208314314e-05, "sampling/prob_min": 1.9820158195216207e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1832000017166138, "sampling/prompt_draws_total": 35496.0, "sampling/seen_fraction": 0.7670066595077515, "sampling/unseen_fraction": 0.23299334049224854, "signal/accuracy_reward/centered_abs_mean": 0.1231119766831398, "signal/accuracy_reward/group_std_mean": 0.16388897597789764, "signal/accuracy_reward/group_zero_std_frac": 0.5333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0615559883415699, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0615559883415699, "signal/advantage_abs_mean": 0.0827570840716362, "signal/advantage_pre_scale_abs_mean": 0.0827570840716362, "signal/advantage_pre_scale_std": 0.1666143447160721, "signal/advantage_std": 0.1666143447160721, "signal/brier_reward/centered_abs_mean": 0.06436868607997895, "signal/brier_reward/group_std_mean": 0.08770309686660767, "signal/brier_reward/group_zero_std_frac": 0.2194444417953491, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032184343039989474, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.032184343039989474, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.010123697854578496, "signal/format_reward/group_std_mean": 0.019488899409770964, "signal/format_reward/group_zero_std_frac": 0.919444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005061848927289248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005061848927289248, "signal/mean_confidence_reward/centered_abs_mean": 0.05872992873191833, "signal/mean_confidence_reward/group_std_mean": 0.07616192176938057, "signal/mean_confidence_reward/group_zero_std_frac": 0.2194444417953491, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.872992744571093e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.872992744571093e-07, "step": 495 }, { "calibration/aurc": 0.16765371594935913, "calibration/batch_distribution_entropy": 0.6470495126669717, "calibration/batch_entropy_100bins": 0.35226262364466754, "calibration/batch_entropy_10bins": 0.6470495126669717, "calibration/batch_entropy_50bins": 0.414677860993004, "calibration/batch_uniqueness": 0.45946199632610396, "calibration/confidence_entropy": 0.47391215467122316, "calibration/coverage@0%": 0.09608355091383812, "calibration/coverage@1%": 0.22036553524804176, "calibration/coverage@10%": 0.43185378590078327, "calibration/coverage@15%": 0.44960835509138375, "calibration/coverage@20%": 0.6095449847693646, "calibration/coverage@25%": 0.7096945713664056, "calibration/coverage@30%": 0.8529917319408181, "calibration/coverage@5%": 0.2710182767624021, "calibration/distribution_entropy_10": 0.6470495126669717, "calibration/distribution_entropy_100": 0.35226262364466754, "calibration/ece": 0.138503590078329, "calibration/mean_confidence": 0.7416133594429939, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0051215277777778125, "completions/max_length": 3449.0, "completions/max_terminated_length": 3449.0, "completions/mean_length": 971.3076416015625, "completions/mean_terminated_length": 976.3692626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 305.8, "epoch": 1.2019230769230769, "grad_norm": 0.0005834887851960957, "learning_rate": 3.245192307692308e-06, "loss": -0.0049, "num_tokens": 1230721070.0, "reward": 1.2864141702651977, "reward_std": 0.1127560317516327, "rewards/accuracy_reward": 0.7265624880790711, "rewards/brier_reward": 0.8513723850250244, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9948784708976746, "rewards/mean_confidence_reward": 0.7555121302604675, "sampling/batch_mean_priority_error": 0.04447198323683641, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3277777777777777, "sampling/error_ema_max": 0.2029161900281906, "sampling/error_ema_mean": 0.008906039968132972, "sampling/priority_kl": 0.03000001348555088, "sampling/priority_scale": 0.7144269287819043, "sampling/prob_entropy": 10.278946876525879, "sampling/prob_max": 4.7904710663715376e-05, "sampling/prob_min": 1.9869897369062527e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.19520001411438, "sampling/prompt_draws_total": 35856.0, "sampling/seen_fraction": 0.7709800004959106, "sampling/unseen_fraction": 0.22901999950408936, "signal/accuracy_reward/centered_abs_mean": 0.10986327975988389, "signal/accuracy_reward/group_std_mean": 0.14956827759742736, "signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05493163987994194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05493163987994194, "signal/advantage_abs_mean": 0.08156569451093673, "signal/advantage_pre_scale_abs_mean": 0.08156569451093673, "signal/advantage_pre_scale_std": 0.1688571810722351, "signal/advantage_std": 0.1688571810722351, "signal/brier_reward/centered_abs_mean": 0.06213839054107666, "signal/brier_reward/group_std_mean": 0.08537867367267608, "signal/brier_reward/group_zero_std_frac": 0.24166666865348815, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03106919527053833, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03106919527053833, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007731119659729302, "signal/format_reward/group_std_mean": 0.013650832884013652, "signal/format_reward/group_zero_std_frac": 0.9444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003865559829864651, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003865559829864651, "signal/mean_confidence_reward/centered_abs_mean": 0.04973362013697624, "signal/mean_confidence_reward/group_std_mean": 0.06599054262042045, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777777910232543, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.973361910742824e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.973361910742824e-07, "step": 500 }, { "epoch": 1.2019230769230769, "eval_calibration/aurc": 0.12491768383573112, "eval_calibration/batch_distribution_entropy": 0.589795021571096, "eval_calibration/batch_entropy_100bins": 0.3239887453964619, "eval_calibration/batch_entropy_10bins": 0.589795021571096, "eval_calibration/batch_entropy_50bins": 0.3813943089867336, "eval_calibration/batch_uniqueness": 0.29058405650919084, "eval_calibration/confidence_entropy": 0.44710805611226434, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5693112467306015, "eval_calibration/coverage@15%": 0.7471665213600698, "eval_calibration/coverage@20%": 0.8186573670444638, "eval_calibration/coverage@25%": 0.939843068875327, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.589795021571096, "eval_calibration/distribution_entropy_100": 0.3239887453964619, "eval_calibration/ece": 0.05074106364428948, "eval_calibration/mean_confidence": 0.760767218831735, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.004340277777777808, "eval_completions/max_length": 2845.3333333333335, "eval_completions/max_terminated_length": 2845.3333333333335, "eval_completions/mean_length": 986.4887390136719, "eval_completions/mean_terminated_length": 990.8467610677084, "eval_completions/min_length": 75.0, "eval_completions/min_terminated_length": 329.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 1230721070.0, "eval_reward": 1.288488507270813, "eval_reward_std": 0.31713973979155224, "eval_rewards/accuracy_reward": 0.7265625, "eval_rewards/brier_reward": 0.8547395765781403, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.995659738779068, "eval_rewards/mean_confidence_reward": 0.7574652334054311, "eval_runtime": 207.7192, "eval_samples_per_second": 4.814, "eval_signal/accuracy_reward/centered_abs_mean": 0.3863389740387599, "eval_signal/accuracy_reward/group_std_mean": 0.44475344320138294, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19316948701937994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19316948701937994, "eval_signal/advantage_abs_mean": 0.2657373497883479, "eval_signal/advantage_pre_scale_abs_mean": 0.2657373497883479, "eval_signal/advantage_pre_scale_std": 0.3151099781195323, "eval_signal/advantage_std": 0.3151099781195323, "eval_signal/brier_reward/centered_abs_mean": 0.16554688413937887, "eval_signal/brier_reward/group_std_mean": 0.22295028964678446, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08277344206968944, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08277344206968944, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.008409287935743729, "eval_signal/format_reward/group_std_mean": 0.02455231888840596, "eval_signal/format_reward/group_zero_std_frac": 0.8611111442248026, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.16469181329011917, "eval_signal/mean_confidence_reward/group_std_mean": 0.20532790819803873, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.646918121878116e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.646918121878116e-06, "eval_steps_per_second": 0.029, "step": 500 }, { "epoch": 1.2019230769230769, "step": 500, "train_probe_calibration/aurc": 0.0909489939142326, "train_probe_calibration/batch_distribution_entropy": 0.5765734888146544, "train_probe_calibration/batch_entropy_100bins": 0.315147910452945, "train_probe_calibration/batch_entropy_10bins": 0.5765734888146544, "train_probe_calibration/batch_entropy_50bins": 0.37098702113473647, "train_probe_calibration/batch_uniqueness": 0.26795141206308043, "train_probe_calibration/confidence_entropy": 0.44427701730624236, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.6899563318777293, "train_probe_calibration/coverage@15%": 0.8296943231441049, "train_probe_calibration/coverage@20%": 0.8951965065502183, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.5765734888146544, "train_probe_calibration/distribution_entropy_100": 0.315147910452945, "train_probe_calibration/ece": 0.05414847161572035, "train_probe_calibration/mean_confidence": 0.7689082969432315, "train_probe_calibration/unique_confidence_per_question": 0.006944444444444444, "train_probe_calibration/unique_confidences": 8, "train_probe_completions/clipped_ratio": 0.005208333333333333, "train_probe_completions/max_length": 3334.8333333333335, "train_probe_completions/max_terminated_length": 3334.8333333333335, "train_probe_completions/mean_length": 1006.7073262532552, "train_probe_completions/mean_terminated_length": 1012.0302327473959, "train_probe_completions/min_length": 105.16666666666667, "train_probe_completions/min_terminated_length": 273.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 1230721070.0, "train_probe_reward": 1.3158843914667766, "train_probe_reward_std": 0.3002795527378718, "train_probe_rewards/accuracy_reward": 0.7630208233992258, "train_probe_rewards/brier_reward": 0.8748090267181396, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9939236144224802, "train_probe_rewards/mean_confidence_reward": 0.7642360925674438, "train_probe_runtime": 203.9359, "train_probe_samples_per_second": 4.904, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3511827290058136, "train_probe_signal/accuracy_reward/group_std_mean": 0.42282938460508984, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1755913645029068, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1755913645029068, "train_probe_signal/advantage_abs_mean": 0.24092507114013037, "train_probe_signal/advantage_pre_scale_abs_mean": 0.24092507114013037, "train_probe_signal/advantage_pre_scale_std": 0.3001803358395894, "train_probe_signal/advantage_std": 0.3001803358395894, "train_probe_signal/brier_reward/centered_abs_mean": 0.14480145027240118, "train_probe_signal/brier_reward/group_std_mean": 0.2011666273077329, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07240072513620059, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07240072513620059, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.011773003110041222, "train_probe_signal/format_reward/group_std_mean": 0.034373246443768345, "train_probe_signal/format_reward/group_zero_std_frac": 0.8055555820465088, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501555020611, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.005886501555020611, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.16013995806376138, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.19872909784317017, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.601399503670109e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.601399503670109e-06, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.10340563136783469, "calibration/batch_distribution_entropy": 0.5784544873790741, "calibration/batch_entropy_100bins": 0.31161774907468726, "calibration/batch_entropy_10bins": 0.5784544873790741, "calibration/batch_entropy_50bins": 0.36683137227778395, "calibration/batch_uniqueness": 0.2844652792158847, "calibration/confidence_entropy": 0.4593182530564782, "calibration/coverage@0%": 0.24305086599319234, "calibration/coverage@1%": 0.2702759968832447, "calibration/coverage@10%": 0.6897732052417556, "calibration/coverage@15%": 0.7698497067630805, "calibration/coverage@20%": 0.816460947659054, "calibration/coverage@25%": 0.8677412059784956, "calibration/coverage@30%": 0.8724409448818898, "calibration/coverage@5%": 0.5355485582848349, "calibration/distribution_entropy_10": 0.5784544873790741, "calibration/distribution_entropy_100": 0.31161774907468726, "calibration/ece": 0.16430700985154553, "calibration/mean_confidence": 0.7618425281355379, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666674, "completions/max_length": 3440.4, "completions/max_terminated_length": 3440.4, "completions/mean_length": 986.4609375, "completions/mean_terminated_length": 990.5824584960938, "completions/min_length": 0.0, "completions/min_terminated_length": 299.0, "epoch": 1.2139423076923077, "grad_norm": 0.0005349395796656609, "learning_rate": 3.215144230769231e-06, "loss": -0.0044, "num_tokens": 1245170988.0, "reward": 1.2705562591552735, "reward_std": 0.11584860682487488, "rewards/accuracy_reward": 0.7032986044883728, "rewards/brier_reward": 0.8420520901679993, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9957465291023254, "rewards/mean_confidence_reward": 0.7627951264381408, "sampling/batch_mean_priority_error": 0.04778075370517789, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32222222222222224, "sampling/error_ema_max": 0.2029161900281906, "sampling/error_ema_mean": 0.008955173008143903, "sampling/priority_kl": 0.029999982565641403, "sampling/priority_scale": 0.7150903284782544, "sampling/prob_entropy": 10.278950881958007, "sampling/prob_max": 4.8062007408589125e-05, "sampling/prob_min": 1.9918856924050488e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.207200002670288, "sampling/prompt_draws_total": 36216.0, "sampling/seen_fraction": 0.7749266624450684, "sampling/unseen_fraction": 0.22507333755493164, "signal/accuracy_reward/centered_abs_mean": 0.11077474057674408, "signal/accuracy_reward/group_std_mean": 0.15214249938726426, "signal/accuracy_reward/group_zero_std_frac": 0.544444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05538737028837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05538737028837204, "signal/advantage_abs_mean": 0.08241928070783615, "signal/advantage_pre_scale_abs_mean": 0.08241928070783615, "signal/advantage_pre_scale_std": 0.1683724880218506, "signal/advantage_std": 0.1683724880218506, "signal/brier_reward/centered_abs_mean": 0.06432757750153542, "signal/brier_reward/group_std_mean": 0.08972443491220475, "signal/brier_reward/group_zero_std_frac": 0.29166666865348817, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03216378875076771, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03216378875076771, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007557508582249284, "signal/format_reward/group_std_mean": 0.014655602350831031, "signal/format_reward/group_zero_std_frac": 0.9388889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003778754291124642, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003778754291124642, "signal/mean_confidence_reward/centered_abs_mean": 0.04986003115773201, "signal/mean_confidence_reward/group_std_mean": 0.06695612743496895, "signal/mean_confidence_reward/group_zero_std_frac": 0.30833334028720855, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.986002693385671e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.986002693385671e-07, "step": 505 }, { "calibration/aurc": 0.12091567329964312, "calibration/batch_distribution_entropy": 0.6790414307027339, "calibration/batch_entropy_100bins": 0.38284855021121017, "calibration/batch_entropy_10bins": 0.6790414307027339, "calibration/batch_entropy_50bins": 0.45068311887099244, "calibration/batch_uniqueness": 0.5521023320397787, "calibration/confidence_entropy": 0.49995264723742083, "calibration/coverage@0%": 0.11880101149236241, "calibration/coverage@1%": 0.13916654674040418, "calibration/coverage@10%": 0.46862419520911713, "calibration/coverage@15%": 0.7173659117308444, "calibration/coverage@20%": 0.8121888555950741, "calibration/coverage@25%": 0.9218670120543025, "calibration/coverage@30%": 0.9643044619422572, "calibration/coverage@5%": 0.2675304676952913, "calibration/distribution_entropy_10": 0.6790414307027339, "calibration/distribution_entropy_100": 0.38284855021121017, "calibration/ece": 0.10470707582080965, "calibration/mean_confidence": 0.6953173891367366, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00625, "completions/max_length": 3732.4, "completions/max_terminated_length": 3732.4, "completions/mean_length": 1025.9516357421876, "completions/mean_terminated_length": 1032.6173217773437, "completions/min_length": 0.0, "completions/min_terminated_length": 279.8, "epoch": 1.2259615384615385, "grad_norm": 0.000531997240614146, "learning_rate": 3.185096153846154e-06, "loss": -0.0031, "num_tokens": 1260136031.0, "reward": 1.290212059020996, "reward_std": 0.10709643959999085, "rewards/accuracy_reward": 0.7328124880790711, "rewards/brier_reward": 0.8538472175598144, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.99375, "rewards/mean_confidence_reward": 0.7211632013320923, "sampling/batch_mean_priority_error": 0.03896267267771803, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.35, "sampling/error_ema_max": 0.2029161900281906, "sampling/error_ema_mean": 0.008996725827455521, "sampling/priority_kl": 0.03000003769993782, "sampling/priority_scale": 0.7160103619331494, "sampling/prob_entropy": 10.278948211669922, "sampling/prob_max": 4.822869086638093e-05, "sampling/prob_min": 1.9965295723523014e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.2192000150680542, "sampling/prompt_draws_total": 36576.0, "sampling/seen_fraction": 0.7789933443069458, "sampling/unseen_fraction": 0.2210066556930542, "signal/accuracy_reward/centered_abs_mean": 0.10720485895872116, "signal/accuracy_reward/group_std_mean": 0.1490975335240364, "signal/accuracy_reward/group_zero_std_frac": 0.544444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05360242947936058, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05360242947936058, "signal/advantage_abs_mean": 0.07514681965112686, "signal/advantage_pre_scale_abs_mean": 0.07514681965112686, "signal/advantage_pre_scale_std": 0.15790832936763763, "signal/advantage_std": 0.15790832936763763, "signal/brier_reward/centered_abs_mean": 0.05860713720321655, "signal/brier_reward/group_std_mean": 0.08091188073158265, "signal/brier_reward/group_zero_std_frac": 0.23611111789941788, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029303568601608276, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029303568601608276, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005859375, "signal/format_reward/group_std_mean": 0.012872960604727268, "signal/format_reward/group_zero_std_frac": 0.9388889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0029296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0029296875, "signal/mean_confidence_reward/centered_abs_mean": 0.049307186901569364, "signal/mean_confidence_reward/group_std_mean": 0.06485608667135238, "signal/mean_confidence_reward/group_zero_std_frac": 0.2500000029802322, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.930718716877891e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.930718716877891e-07, "step": 510 }, { "calibration/aurc": 0.127601593118219, "calibration/batch_distribution_entropy": 0.7003445364316236, "calibration/batch_entropy_100bins": 0.39342611836657276, "calibration/batch_entropy_10bins": 0.7003445364316236, "calibration/batch_entropy_50bins": 0.4631348609598662, "calibration/batch_uniqueness": 0.6125533215887912, "calibration/confidence_entropy": 0.5242468094631405, "calibration/coverage@0%": 0.08802083333333334, "calibration/coverage@1%": 0.11145833333333335, "calibration/coverage@10%": 0.4462222584856397, "calibration/coverage@15%": 0.6676634573542211, "calibration/coverage@20%": 0.840625, "calibration/coverage@25%": 0.8557291666666668, "calibration/coverage@30%": 0.909375, "calibration/coverage@5%": 0.2366296779808529, "calibration/distribution_entropy_10": 0.7003445364316236, "calibration/distribution_entropy_100": 0.39342611836657276, "calibration/ece": 0.1235987815491731, "calibration/mean_confidence": 0.6859905352480419, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3653.6, "completions/max_terminated_length": 3653.6, "completions/mean_length": 1023.5614624023438, "completions/mean_terminated_length": 1027.6983764648437, "completions/min_length": 0.0, "completions/min_terminated_length": 238.4, "epoch": 1.2379807692307692, "grad_norm": 0.0005270320107229054, "learning_rate": 3.1550480769230772e-06, "loss": -0.0029, "num_tokens": 1275026531.0, "reward": 1.2900764226913453, "reward_std": 0.10513209253549576, "rewards/accuracy_reward": 0.7271701335906983, "rewards/brier_reward": 0.8571354269981384, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9958333373069763, "rewards/mean_confidence_reward": 0.6929513812065125, "sampling/batch_mean_priority_error": 0.030280168036501465, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3638888888888889, "sampling/error_ema_max": 0.2029161900281906, "sampling/error_ema_mean": 0.009028730355203152, "sampling/priority_kl": 0.030000071972608566, "sampling/priority_scale": 0.7171281397575513, "sampling/prob_entropy": 10.278954887390137, "sampling/prob_max": 4.840152687393129e-05, "sampling/prob_min": 2.0009274157928304e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.2312000036239623, "sampling/prompt_draws_total": 36936.0, "sampling/seen_fraction": 0.7831333398818969, "sampling/unseen_fraction": 0.21686666011810302, "signal/accuracy_reward/centered_abs_mean": 0.11109483391046523, "signal/accuracy_reward/group_std_mean": 0.15123350024223328, "signal/accuracy_reward/group_zero_std_frac": 0.5500000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05554741695523262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05554741695523262, "signal/advantage_abs_mean": 0.07481730729341507, "signal/advantage_pre_scale_abs_mean": 0.07481730729341507, "signal/advantage_pre_scale_std": 0.15349577963352204, "signal/advantage_std": 0.15349577963352204, "signal/brier_reward/centered_abs_mean": 0.056370685994625094, "signal/brier_reward/group_std_mean": 0.07771345749497413, "signal/brier_reward/group_zero_std_frac": 0.13888889253139497, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028185342997312547, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.028185342997312547, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006966145779006183, "signal/format_reward/group_std_mean": 0.014438749477267265, "signal/format_reward/group_zero_std_frac": 0.9333333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0034830728895030917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0034830728895030917, "signal/mean_confidence_reward/centered_abs_mean": 0.05178168341517449, "signal/mean_confidence_reward/group_std_mean": 0.06841083616018295, "signal/mean_confidence_reward/group_zero_std_frac": 0.1416666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.178168294150964e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.178168294150964e-07, "step": 515 }, { "calibration/aurc": 0.11846951228931728, "calibration/batch_distribution_entropy": 0.6525936085812469, "calibration/batch_entropy_100bins": 0.3623802260025296, "calibration/batch_entropy_10bins": 0.6525936085812469, "calibration/batch_entropy_50bins": 0.42658813878724466, "calibration/batch_uniqueness": 0.4680648818769151, "calibration/confidence_entropy": 0.47899836063768664, "calibration/coverage@0%": 0.15833333333333333, "calibration/coverage@1%": 0.3026041666666667, "calibration/coverage@10%": 0.5026150741710296, "calibration/coverage@15%": 0.6038094458987784, "calibration/coverage@20%": 0.7969448877694695, "calibration/coverage@25%": 0.8788396693687659, "calibration/coverage@30%": 0.8788396693687659, "calibration/coverage@5%": 0.4316835732984293, "calibration/distribution_entropy_10": 0.6525936085812469, "calibration/distribution_entropy_100": 0.3623802260025296, "calibration/ece": 0.14434328991975715, "calibration/mean_confidence": 0.6961930389047614, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004774305555555558, "completions/max_length": 3867.0, "completions/max_terminated_length": 3867.0, "completions/mean_length": 1104.88525390625, "completions/mean_terminated_length": 1110.30390625, "completions/min_length": 0.0, "completions/min_terminated_length": 308.2, "epoch": 1.25, "grad_norm": 0.0005092016654089093, "learning_rate": 3.125e-06, "loss": -0.0034, "num_tokens": 1290852057.0, "reward": 1.2876001596450806, "reward_std": 0.11605858206748962, "rewards/accuracy_reward": 0.7191840291023255, "rewards/brier_reward": 0.8607769012451172, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9952256917953491, "rewards/mean_confidence_reward": 0.6877343535423279, "sampling/batch_mean_priority_error": 0.029131544333956733, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.33611111111111114, "sampling/error_ema_max": 0.2029161900281906, "sampling/error_ema_mean": 0.009056623838841916, "sampling/priority_kl": 0.029999974742531778, "sampling/priority_scale": 0.7184703051811084, "sampling/prob_entropy": 10.278947067260741, "sampling/prob_max": 4.858248357777484e-05, "sampling/prob_min": 1.993556652450934e-05, "sampling/prompt_draws_max": 6.2, "sampling/prompt_draws_mean": 1.2431999921798706, "sampling/prompt_draws_total": 37296.0, "sampling/seen_fraction": 0.7873533248901368, "sampling/unseen_fraction": 0.2126466751098633, "signal/accuracy_reward/centered_abs_mean": 0.12505968064069747, "signal/accuracy_reward/group_std_mean": 0.16689604818820952, "signal/accuracy_reward/group_zero_std_frac": 0.5194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06252984032034874, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06252984032034874, "signal/advantage_abs_mean": 0.08188586384057998, "signal/advantage_pre_scale_abs_mean": 0.08188586384057998, "signal/advantage_pre_scale_std": 0.16324815154075623, "signal/advantage_std": 0.16324815154075623, "signal/brier_reward/centered_abs_mean": 0.059866245836019516, "signal/brier_reward/group_std_mean": 0.08398651629686356, "signal/brier_reward/group_zero_std_frac": 0.15277777686715127, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029933122918009758, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029933122918009758, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.008783637313172221, "signal/format_reward/group_std_mean": 0.02044099196791649, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004391818656586111, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004391818656586111, "signal/mean_confidence_reward/centered_abs_mean": 0.054735784977674486, "signal/mean_confidence_reward/group_std_mean": 0.07394103705883026, "signal/mean_confidence_reward/group_zero_std_frac": 0.16944444328546523, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.473578539749723e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.473578539749723e-07, "step": 520 }, { "calibration/aurc": 0.09678537048996547, "calibration/batch_distribution_entropy": 0.6171174397568164, "calibration/batch_entropy_100bins": 0.34609810798263696, "calibration/batch_entropy_10bins": 0.6171174397568164, "calibration/batch_entropy_50bins": 0.4074210928966894, "calibration/batch_uniqueness": 0.4126739037344662, "calibration/confidence_entropy": 0.4748172772125029, "calibration/coverage@0%": 0.10976253298153034, "calibration/coverage@1%": 0.22955419964819698, "calibration/coverage@10%": 0.6564760495175418, "calibration/coverage@15%": 0.7377408790067616, "calibration/coverage@20%": 0.7880261492308591, "calibration/coverage@25%": 0.8417837844839132, "calibration/coverage@30%": 0.9145660052441771, "calibration/coverage@5%": 0.5810012932521524, "calibration/distribution_entropy_10": 0.6171174397568164, "calibration/distribution_entropy_100": 0.34609810798263696, "calibration/ece": 0.13466796804572648, "calibration/mean_confidence": 0.7060822233364991, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006510416666666652, "completions/max_length": 3772.4, "completions/max_terminated_length": 3772.4, "completions/mean_length": 1085.634765625, "completions/mean_terminated_length": 1092.8089599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 283.4, "epoch": 1.2620192307692308, "grad_norm": 0.0005398774519562721, "learning_rate": 3.094951923076923e-06, "loss": -0.0066, "num_tokens": 1306456009.0, "reward": 1.2903912544250489, "reward_std": 0.11569144278764724, "rewards/accuracy_reward": 0.717187511920929, "rewards/brier_reward": 0.870177960395813, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9934027791023254, "rewards/mean_confidence_reward": 0.7156336784362793, "sampling/batch_mean_priority_error": 0.028870389832563247, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32222222222222224, "sampling/error_ema_max": 0.2029161900281906, "sampling/error_ema_mean": 0.009079698100686073, "sampling/priority_kl": 0.03000027798116207, "sampling/priority_scale": 0.7196202219231054, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 4.875725426245481e-05, "sampling/prob_min": 1.9516599422786385e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.2551999807357788, "sampling/prompt_draws_total": 37656.0, "sampling/seen_fraction": 0.7913266539573669, "sampling/unseen_fraction": 0.20867334604263305, "signal/accuracy_reward/centered_abs_mean": 0.11032986044883727, "signal/accuracy_reward/group_std_mean": 0.1527843713760376, "signal/accuracy_reward/group_zero_std_frac": 0.5333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05516493022441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05516493022441864, "signal/advantage_abs_mean": 0.07982016056776046, "signal/advantage_pre_scale_abs_mean": 0.07982016056776046, "signal/advantage_pre_scale_std": 0.16589364111423494, "signal/advantage_std": 0.16589364111423494, "signal/brier_reward/centered_abs_mean": 0.0613716222345829, "signal/brier_reward/group_std_mean": 0.08781401515007019, "signal/brier_reward/group_zero_std_frac": 0.2194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03068581111729145, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03068581111729145, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.010568576585501432, "signal/format_reward/group_std_mean": 0.020994319766759872, "signal/format_reward/group_zero_std_frac": 0.9083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005284288292750716, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005284288292750716, "signal/mean_confidence_reward/centered_abs_mean": 0.053071295469999315, "signal/mean_confidence_reward/group_std_mean": 0.07041076719760894, "signal/mean_confidence_reward/group_zero_std_frac": 0.2416666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.307129413267831e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.307129413267831e-07, "step": 525 }, { "calibration/aurc": 0.1391010387250999, "calibration/batch_distribution_entropy": 0.5834054079074263, "calibration/batch_entropy_100bins": 0.3178072726414959, "calibration/batch_entropy_10bins": 0.5834054079074263, "calibration/batch_entropy_50bins": 0.37411757927497885, "calibration/batch_uniqueness": 0.3022310683542095, "calibration/confidence_entropy": 0.4543434565360574, "calibration/coverage@0%": 0.09109947643979058, "calibration/coverage@1%": 0.09109947643979058, "calibration/coverage@10%": 0.5162209256626522, "calibration/coverage@15%": 0.5621755017452007, "calibration/coverage@20%": 0.7390406849912741, "calibration/coverage@25%": 0.8044257198952879, "calibration/coverage@30%": 0.8922829406631761, "calibration/coverage@5%": 0.2702445559307206, "calibration/distribution_entropy_10": 0.5834054079074263, "calibration/distribution_entropy_100": 0.3178072726414959, "calibration/ece": 0.11636329422580065, "calibration/mean_confidence": 0.7494788875712548, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005902777777777768, "completions/max_length": 3653.6, "completions/max_terminated_length": 3653.6, "completions/mean_length": 1049.12138671875, "completions/mean_terminated_length": 1055.4906372070313, "completions/min_length": 0.0, "completions/min_terminated_length": 298.0, "epoch": 1.2740384615384617, "grad_norm": 0.000520919100381434, "learning_rate": 3.0649038461538464e-06, "loss": -0.0057, "num_tokens": 1321664575.0, "reward": 1.2939966201782227, "reward_std": 0.11089726239442825, "rewards/accuracy_reward": 0.7355034589767456, "rewards/brier_reward": 0.8584644198417664, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9940104126930237, "rewards/mean_confidence_reward": 0.7482899308204651, "sampling/batch_mean_priority_error": 0.04006456962425671, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32222222222222224, "sampling/error_ema_max": 0.2244912087917328, "sampling/error_ema_mean": 0.009115009754896163, "sampling/priority_kl": 0.029999951645731927, "sampling/priority_scale": 0.7208636105293408, "sampling/prob_entropy": 10.278956031799316, "sampling/prob_max": 4.893360455753282e-05, "sampling/prob_min": 1.955622537934687e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.267199993133545, "sampling/prompt_draws_total": 38016.0, "sampling/seen_fraction": 0.7952666640281677, "sampling/unseen_fraction": 0.20473333597183227, "signal/accuracy_reward/centered_abs_mean": 0.1049533411860466, "signal/accuracy_reward/group_std_mean": 0.1418349489569664, "signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0524766705930233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0524766705930233, "signal/advantage_abs_mean": 0.07906464189291, "signal/advantage_pre_scale_abs_mean": 0.07906464189291, "signal/advantage_pre_scale_std": 0.16779219806194307, "signal/advantage_std": 0.16779219806194307, "signal/brier_reward/centered_abs_mean": 0.06269395127892494, "signal/brier_reward/group_std_mean": 0.0868382677435875, "signal/brier_reward/group_zero_std_frac": 0.26944445073604584, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03134697563946247, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03134697563946247, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009792751725763082, "signal/format_reward/group_std_mean": 0.01961270458996296, "signal/format_reward/group_zero_std_frac": 0.9111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004896375862881541, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004896375862881541, "signal/mean_confidence_reward/centered_abs_mean": 0.05228136330842972, "signal/mean_confidence_reward/group_std_mean": 0.06960389465093612, "signal/mean_confidence_reward/group_zero_std_frac": 0.2888888955116272, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.228136160440045e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.228136160440045e-07, "step": 530 }, { "calibration/aurc": 0.14580923682604757, "calibration/batch_distribution_entropy": 0.5803569578541288, "calibration/batch_entropy_100bins": 0.31552345276778493, "calibration/batch_entropy_10bins": 0.5803569578541288, "calibration/batch_entropy_50bins": 0.3714291034715423, "calibration/batch_uniqueness": 0.31153982143640346, "calibration/confidence_entropy": 0.4751440675952664, "calibration/coverage@0%": 0.20455620412013178, "calibration/coverage@1%": 0.2254986124970951, "calibration/coverage@10%": 0.3195822454308094, "calibration/coverage@15%": 0.6283453002610966, "calibration/coverage@20%": 0.6971306570931244, "calibration/coverage@25%": 0.8098125354000028, "calibration/coverage@30%": 0.8970188017185405, "calibration/coverage@5%": 0.31382308312714446, "calibration/distribution_entropy_10": 0.5803569578541288, "calibration/distribution_entropy_100": 0.31552345276778493, "calibration/ece": 0.13262880646542285, "calibration/mean_confidence": 0.73030042415089, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007118055555555581, "completions/max_length": 3618.6, "completions/max_terminated_length": 3618.6, "completions/mean_length": 1069.8447998046875, "completions/mean_terminated_length": 1077.4728759765626, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 1.2860576923076923, "grad_norm": 0.0005245500360615551, "learning_rate": 3.0348557692307694e-06, "loss": -0.007, "num_tokens": 1337072067.0, "reward": 1.3126560688018798, "reward_std": 0.10845615118741989, "rewards/accuracy_reward": 0.7636284708976746, "rewards/brier_reward": 0.8687873482704163, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9928819298744201, "rewards/mean_confidence_reward": 0.7105642557144165, "sampling/batch_mean_priority_error": 0.02997719025814488, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.33888888888888885, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009147688373923301, "sampling/priority_kl": 0.030000006034970283, "sampling/priority_scale": 0.7223210751777515, "sampling/prob_entropy": 10.278949165344239, "sampling/prob_max": 4.911954893032089e-05, "sampling/prob_min": 1.9594168406911194e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.279200005531311, "sampling/prompt_draws_total": 38376.0, "sampling/seen_fraction": 0.7992733359336853, "sampling/unseen_fraction": 0.2007266640663147, "signal/accuracy_reward/centered_abs_mean": 0.11240776926279068, "signal/accuracy_reward/group_std_mean": 0.15260213911533355, "signal/accuracy_reward/group_zero_std_frac": 0.550000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05620388463139534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05620388463139534, "signal/advantage_abs_mean": 0.07656532227993011, "signal/advantage_pre_scale_abs_mean": 0.07656532227993011, "signal/advantage_pre_scale_std": 0.16070781648159027, "signal/advantage_std": 0.16070781648159027, "signal/brier_reward/centered_abs_mean": 0.05885149613022804, "signal/brier_reward/group_std_mean": 0.08213269263505936, "signal/brier_reward/group_zero_std_frac": 0.2444444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02942574806511402, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02942574806511402, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.012185329757630824, "signal/format_reward/group_std_mean": 0.023869803920388222, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006092664878815412, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006092664878815412, "signal/mean_confidence_reward/centered_abs_mean": 0.05606608688831329, "signal/mean_confidence_reward/group_std_mean": 0.07428872585296631, "signal/mean_confidence_reward/group_zero_std_frac": 0.25277777910232546, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.606608397101809e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.606608397101809e-07, "step": 535 }, { "calibration/aurc": 0.10262722858718329, "calibration/batch_distribution_entropy": 0.6637057506834922, "calibration/batch_entropy_100bins": 0.3623222900478186, "calibration/batch_entropy_10bins": 0.6637057506834922, "calibration/batch_entropy_50bins": 0.4265199375187546, "calibration/batch_uniqueness": 0.5035236225549949, "calibration/confidence_entropy": 0.49673250121522416, "calibration/coverage@0%": 0.11943809834638816, "calibration/coverage@1%": 0.23612652306353352, "calibration/coverage@10%": 0.6410227572030598, "calibration/coverage@15%": 0.7449392062892216, "calibration/coverage@20%": 0.7798717563327379, "calibration/coverage@25%": 0.8375965370344922, "calibration/coverage@30%": 0.8838257523704822, "calibration/coverage@5%": 0.5576431880124593, "calibration/distribution_entropy_10": 0.6637057506834922, "calibration/distribution_entropy_100": 0.3623222900478186, "calibration/ece": 0.16562619669277623, "calibration/mean_confidence": 0.7031650834822043, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006510416666666674, "completions/max_length": 3626.6, "completions/max_terminated_length": 3626.6, "completions/mean_length": 1010.912158203125, "completions/mean_terminated_length": 1017.4999145507812, "completions/min_length": 0.0, "completions/min_terminated_length": 252.2, "epoch": 1.2980769230769231, "grad_norm": 0.0005195676349103451, "learning_rate": 3.0048076923076923e-06, "loss": -0.006, "num_tokens": 1351813903.0, "reward": 1.3025019407272338, "reward_std": 0.10647796541452408, "rewards/accuracy_reward": 0.7448784708976746, "rewards/brier_reward": 0.8666215419769288, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9934895753860473, "rewards/mean_confidence_reward": 0.7113368153572083, "sampling/batch_mean_priority_error": 0.029256865188797367, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2722222222222222, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.00917521920055151, "sampling/priority_kl": 0.03000005558133125, "sampling/priority_scale": 0.7232611239189282, "sampling/prob_entropy": 10.27895393371582, "sampling/prob_max": 4.928471753373742e-05, "sampling/prob_min": 1.963656031875871e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.2911999940872192, "sampling/prompt_draws_total": 38736.0, "sampling/seen_fraction": 0.802793312072754, "sampling/unseen_fraction": 0.1972066879272461, "signal/accuracy_reward/centered_abs_mean": 0.09930012971162797, "signal/accuracy_reward/group_std_mean": 0.14416081011295317, "signal/accuracy_reward/group_zero_std_frac": 0.5361111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04965006485581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04965006485581398, "signal/advantage_abs_mean": 0.07084471881389617, "signal/advantage_pre_scale_abs_mean": 0.07084471881389617, "signal/advantage_pre_scale_std": 0.1556703507900238, "signal/advantage_std": 0.1556703507900238, "signal/brier_reward/centered_abs_mean": 0.05297866091132164, "signal/brier_reward/group_std_mean": 0.07698716819286347, "signal/brier_reward/group_zero_std_frac": 0.20000000670552254, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02648933045566082, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02648933045566082, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009543185913935303, "signal/format_reward/group_std_mean": 0.019470962509512902, "signal/format_reward/group_zero_std_frac": 0.9138889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004771592956967652, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004771592956967652, "signal/mean_confidence_reward/centered_abs_mean": 0.05201877728104591, "signal/mean_confidence_reward/group_std_mean": 0.07003375068306923, "signal/mean_confidence_reward/group_zero_std_frac": 0.21388888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.201877797844645e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.201877797844645e-07, "step": 540 }, { "calibration/aurc": 0.10556109814611606, "calibration/batch_distribution_entropy": 0.600323191977656, "calibration/batch_entropy_100bins": 0.3348939794410301, "calibration/batch_entropy_10bins": 0.600323191977656, "calibration/batch_entropy_50bins": 0.39423177405879073, "calibration/batch_uniqueness": 0.4230486148288807, "calibration/confidence_entropy": 0.48495376273374, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.20783289817232378, "calibration/coverage@10%": 0.6653353459530027, "calibration/coverage@15%": 0.8357579960835511, "calibration/coverage@20%": 0.8942123585726719, "calibration/coverage@25%": 0.9166666666666667, "calibration/coverage@30%": 0.9479166666666667, "calibration/coverage@5%": 0.38077948215839863, "calibration/distribution_entropy_10": 0.600323191977656, "calibration/distribution_entropy_100": 0.3348939794410301, "calibration/ece": 0.11420474325500422, "calibration/mean_confidence": 0.7316595953002613, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3622.2, "completions/max_terminated_length": 3622.2, "completions/mean_length": 1049.6030517578124, "completions/mean_terminated_length": 1053.8172973632813, "completions/min_length": 0.0, "completions/min_terminated_length": 226.8, "epoch": 1.3100961538461537, "grad_norm": 0.00040970509871840477, "learning_rate": 2.974759615384616e-06, "loss": -0.0041, "num_tokens": 1367008978.0, "reward": 1.3004966974258423, "reward_std": 0.09596788734197617, "rewards/accuracy_reward": 0.7361979246139526, "rewards/brier_reward": 0.8687743067741394, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9960069417953491, "rewards/mean_confidence_reward": 0.7135416626930237, "sampling/batch_mean_priority_error": 0.02854209502281709, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2972222222222222, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009199992194771767, "sampling/priority_kl": 0.029999776929616927, "sampling/priority_scale": 0.7241226016776636, "sampling/prob_entropy": 10.278952407836915, "sampling/prob_max": 4.944601314491592e-05, "sampling/prob_min": 1.967924072232563e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3032000064849854, "sampling/prompt_draws_total": 39096.0, "sampling/seen_fraction": 0.806166672706604, "sampling/unseen_fraction": 0.193833327293396, "signal/accuracy_reward/centered_abs_mean": 0.09425455927848816, "signal/accuracy_reward/group_std_mean": 0.1308629497885704, "signal/accuracy_reward/group_zero_std_frac": 0.5972222208976745, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04712727963924408, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04712727963924408, "signal/advantage_abs_mean": 0.06631011962890625, "signal/advantage_pre_scale_abs_mean": 0.06631011962890625, "signal/advantage_pre_scale_std": 0.1461026519536972, "signal/advantage_std": 0.1461026519536972, "signal/brier_reward/centered_abs_mean": 0.0521217867732048, "signal/brier_reward/group_std_mean": 0.07372160628437996, "signal/brier_reward/group_zero_std_frac": 0.22500000298023223, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0260608933866024, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0260608933866024, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007367621548473835, "signal/format_reward/group_std_mean": 0.016247615963220597, "signal/format_reward/group_zero_std_frac": 0.9250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036838107742369177, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0036838107742369177, "signal/mean_confidence_reward/centered_abs_mean": 0.0497748501598835, "signal/mean_confidence_reward/group_std_mean": 0.06694498136639596, "signal/mean_confidence_reward/group_zero_std_frac": 0.23055556118488313, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.977485048129893e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.977485048129893e-07, "step": 545 }, { "calibration/aurc": 0.1477792257131352, "calibration/batch_distribution_entropy": 0.5479882600039013, "calibration/batch_entropy_100bins": 0.31210863541520106, "calibration/batch_entropy_10bins": 0.5479882600039013, "calibration/batch_entropy_50bins": 0.3674092357353625, "calibration/batch_uniqueness": 0.2573770453378573, "calibration/confidence_entropy": 0.47178416347281205, "calibration/coverage@0%": 0.23201625218150088, "calibration/coverage@1%": 0.3627454188481675, "calibration/coverage@10%": 0.520556265545341, "calibration/coverage@15%": 0.5404515535034561, "calibration/coverage@20%": 0.6984934546385235, "calibration/coverage@25%": 0.748784629179206, "calibration/coverage@30%": 0.7916156719366232, "calibration/coverage@5%": 0.44763959887867416, "calibration/distribution_entropy_10": 0.5479882600039013, "calibration/distribution_entropy_100": 0.31210863541520106, "calibration/ece": 0.12668608975416018, "calibration/mean_confidence": 0.7079187279273338, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3873.4, "completions/max_terminated_length": 3873.4, "completions/mean_length": 1055.5867553710937, "completions/mean_terminated_length": 1059.9642700195313, "completions/min_length": 0.0, "completions/min_terminated_length": 294.4, "epoch": 1.3221153846153846, "grad_norm": 0.0004762877942994237, "learning_rate": 2.9447115384615386e-06, "loss": -0.0029, "num_tokens": 1382259161.0, "reward": 1.3024785041809082, "reward_std": 0.10325210392475129, "rewards/accuracy_reward": 0.7399305582046509, "rewards/brier_reward": 0.8690920114517212, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9959201455116272, "rewards/mean_confidence_reward": 0.7217013597488403, "sampling/batch_mean_priority_error": 0.03352335706747509, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.25, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009228985756635666, "sampling/priority_kl": 0.03000006005167961, "sampling/priority_scale": 0.725033014989458, "sampling/prob_entropy": 10.278955268859864, "sampling/prob_max": 4.961000668117777e-05, "sampling/prob_min": 1.972165518964175e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3152000188827515, "sampling/prompt_draws_total": 39456.0, "sampling/seen_fraction": 0.809500002861023, "sampling/unseen_fraction": 0.19049999713897706, "signal/accuracy_reward/centered_abs_mean": 0.10303819477558136, "signal/accuracy_reward/group_std_mean": 0.14597719609737397, "signal/accuracy_reward/group_zero_std_frac": 0.5388888895511628, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05151909738779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05151909738779068, "signal/advantage_abs_mean": 0.07086191922426224, "signal/advantage_pre_scale_abs_mean": 0.07086191922426224, "signal/advantage_pre_scale_std": 0.15113241821527482, "signal/advantage_std": 0.15113241821527482, "signal/brier_reward/centered_abs_mean": 0.059060697257518766, "signal/brier_reward/group_std_mean": 0.0823451228439808, "signal/brier_reward/group_zero_std_frac": 0.2472222253680229, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029530348628759383, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029530348628759383, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007199435611255467, "signal/format_reward/group_std_mean": 0.01524608675390482, "signal/format_reward/group_zero_std_frac": 0.9305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0035997178056277335, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0035997178056277335, "signal/mean_confidence_reward/centered_abs_mean": 0.0533691443502903, "signal/mean_confidence_reward/group_std_mean": 0.07072461247444153, "signal/mean_confidence_reward/group_zero_std_frac": 0.26111111640930174, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.336914341569354e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.336914341569354e-07, "step": 550 }, { "epoch": 1.3221153846153846, "eval_calibration/aurc": 0.13271904073131, "eval_calibration/batch_distribution_entropy": 0.5830219432889268, "eval_calibration/batch_entropy_100bins": 0.32198688591764624, "eval_calibration/batch_entropy_10bins": 0.5830219432889268, "eval_calibration/batch_entropy_50bins": 0.37903775239808685, "eval_calibration/batch_uniqueness": 0.2664253397778066, "eval_calibration/confidence_entropy": 0.44559934075697216, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5839860748476936, "eval_calibration/coverage@15%": 0.720626631853786, "eval_calibration/coverage@20%": 0.7771975630983464, "eval_calibration/coverage@25%": 0.9155787641427328, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5830219432889268, "eval_calibration/distribution_entropy_100": 0.32198688591764624, "eval_calibration/ece": 0.05126196692776324, "eval_calibration/mean_confidence": 0.7456048738033073, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.002604166666666685, "eval_completions/max_length": 2827.1666666666665, "eval_completions/max_terminated_length": 2827.1666666666665, "eval_completions/mean_length": 1019.587168375651, "eval_completions/mean_terminated_length": 1022.301767985026, "eval_completions/min_length": 180.66666666666666, "eval_completions/min_terminated_length": 328.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 1382259161.0, "eval_reward": 1.2812618215878804, "eval_reward_std": 0.3112395455439885, "eval_rewards/accuracy_reward": 0.7048611144224802, "eval_rewards/brier_reward": 0.8602517445882162, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9973958432674408, "eval_rewards/mean_confidence_reward": 0.7436631619930267, "eval_runtime": 176.3071, "eval_samples_per_second": 5.672, "eval_signal/accuracy_reward/centered_abs_mean": 0.4022352397441864, "eval_signal/accuracy_reward/group_std_mean": 0.45433419446150464, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2011176198720932, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2011176198720932, "eval_signal/advantage_abs_mean": 0.2658406247695287, "eval_signal/advantage_pre_scale_abs_mean": 0.2658406247695287, "eval_signal/advantage_pre_scale_std": 0.30921146273612976, "eval_signal/advantage_std": 0.30921146273612976, "eval_signal/brier_reward/centered_abs_mean": 0.15962457656860352, "eval_signal/brier_reward/group_std_mean": 0.2166272054115931, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07981228828430176, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.07981228828430176, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/format_reward/group_std_mean": 0.014731391333043575, "eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.18547633538643518, "eval_signal/mean_confidence_reward/group_std_mean": 0.22163266936937967, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.8547631839282985e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.8547631839282985e-06, "eval_steps_per_second": 0.034, "step": 550 }, { "epoch": 1.3221153846153846, "step": 550, "train_probe_calibration/aurc": 0.10593902445540661, "train_probe_calibration/batch_distribution_entropy": 0.5521083451073077, "train_probe_calibration/batch_entropy_100bins": 0.3021480965124056, "train_probe_calibration/batch_entropy_10bins": 0.5521083451073077, "train_probe_calibration/batch_entropy_50bins": 0.3556838504991608, "train_probe_calibration/batch_uniqueness": 0.18839915502191362, "train_probe_calibration/confidence_entropy": 0.4368206661541801, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.6820557491289199, "train_probe_calibration/coverage@15%": 0.794425087108014, "train_probe_calibration/coverage@20%": 0.9242160278745645, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.5521083451073077, "train_probe_calibration/distribution_entropy_100": 0.3021480965124056, "train_probe_calibration/ece": 0.02447735191637619, "train_probe_calibration/mean_confidence": 0.7603658536585367, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.0034722222222222467, "train_probe_completions/max_length": 2756.5, "train_probe_completions/max_terminated_length": 2756.5, "train_probe_completions/mean_length": 1045.1444905598958, "train_probe_completions/mean_terminated_length": 1048.7373758951824, "train_probe_completions/min_length": 75.0, "train_probe_completions/min_terminated_length": 256.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 1382259161.0, "train_probe_reward": 1.3149598638216655, "train_probe_reward_std": 0.2965337634086609, "train_probe_rewards/accuracy_reward": 0.7630208333333334, "train_probe_rewards/brier_reward": 0.8703559041023254, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9965277910232544, "train_probe_rewards/mean_confidence_reward": 0.7577256659666697, "train_probe_runtime": 189.9017, "train_probe_samples_per_second": 5.266, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3529188384612401, "train_probe_signal/accuracy_reward/group_std_mean": 0.4245219975709915, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.17645941923062006, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17645941923062006, "train_probe_signal/advantage_abs_mean": 0.2381102442741394, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2381102442741394, "train_probe_signal/advantage_pre_scale_std": 0.29500773052374524, "train_probe_signal/advantage_std": 0.29500773052374524, "train_probe_signal/brier_reward/centered_abs_mean": 0.1511371706922849, "train_probe_signal/brier_reward/group_std_mean": 0.21034086495637894, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07556858534614246, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07556858534614246, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.006727430348594983, "train_probe_signal/format_reward/group_std_mean": 0.019641855110724766, "train_probe_signal/format_reward/group_zero_std_frac": 0.8888889153798422, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0033637151742974916, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0033637151742974916, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.17651907602945963, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.21432253966728845, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7651907076772961e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7651907076772961e-06, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.11236809342863448, "calibration/batch_distribution_entropy": 0.4864726383348795, "calibration/batch_entropy_100bins": 0.265186801412563, "calibration/batch_entropy_10bins": 0.4864726383348795, "calibration/batch_entropy_50bins": 0.31217361193637044, "calibration/batch_uniqueness": 0.06888384482603635, "calibration/confidence_entropy": 0.4253938572160966, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.19322916666666667, "calibration/coverage@10%": 0.4472231288076588, "calibration/coverage@15%": 0.6732267188859878, "calibration/coverage@20%": 0.7201343559617058, "calibration/coverage@25%": 0.927412358572672, "calibration/coverage@30%": 0.953267188859878, "calibration/coverage@5%": 0.4102208442123586, "calibration/distribution_entropy_10": 0.4864726383348795, "calibration/distribution_entropy_100": 0.265186801412563, "calibration/ece": 0.11251528285465615, "calibration/mean_confidence": 0.7762872932985205, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003211805555555558, "completions/max_length": 3490.8, "completions/max_terminated_length": 3490.8, "completions/mean_length": 1027.4213012695313, "completions/mean_terminated_length": 1030.7630737304687, "completions/min_length": 0.0, "completions/min_terminated_length": 277.0, "epoch": 1.3341346153846154, "grad_norm": 0.0004955082549713552, "learning_rate": 2.9146634615384615e-06, "loss": -0.0026, "num_tokens": 1397194670.0, "reward": 1.3129889965057373, "reward_std": 0.10958507806062698, "rewards/accuracy_reward": 0.756250011920929, "rewards/brier_reward": 0.8729244709014893, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967881917953492, "rewards/mean_confidence_reward": 0.7695572853088379, "sampling/batch_mean_priority_error": 0.043720418268419124, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24999999999999994, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009267618693411351, "sampling/priority_kl": 0.030000002682209016, "sampling/priority_scale": 0.7255671441322192, "sampling/prob_entropy": 10.278947830200195, "sampling/prob_max": 4.975697302143089e-05, "sampling/prob_min": 1.9709531261469238e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3272000074386596, "sampling/prompt_draws_total": 39816.0, "sampling/seen_fraction": 0.8124399900436401, "sampling/unseen_fraction": 0.18756000995635985, "signal/accuracy_reward/centered_abs_mean": 0.10514322966337204, "signal/accuracy_reward/group_std_mean": 0.1475824624300003, "signal/accuracy_reward/group_zero_std_frac": 0.544444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05257161483168602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05257161483168602, "signal/advantage_abs_mean": 0.07649606168270111, "signal/advantage_pre_scale_abs_mean": 0.07649606168270111, "signal/advantage_pre_scale_std": 0.16270316243171692, "signal/advantage_std": 0.16270316243171692, "signal/brier_reward/centered_abs_mean": 0.06445361748337745, "signal/brier_reward/group_std_mean": 0.09001707881689072, "signal/brier_reward/group_zero_std_frac": 0.33055556416511533, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03222680874168873, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03222680874168873, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005517578113358468, "signal/format_reward/group_std_mean": 0.010547200590372086, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002758789056679234, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002758789056679234, "signal/mean_confidence_reward/centered_abs_mean": 0.04958822429180145, "signal/mean_confidence_reward/group_std_mean": 0.06706940978765488, "signal/mean_confidence_reward/group_zero_std_frac": 0.37222222089767454, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.958821762102161e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.958821762102161e-07, "step": 555 }, { "calibration/aurc": 0.19349510104636097, "calibration/batch_distribution_entropy": 0.4805007810185836, "calibration/batch_entropy_100bins": 0.26217946661122016, "calibration/batch_entropy_10bins": 0.4805007810185836, "calibration/batch_entropy_50bins": 0.3086334260664991, "calibration/batch_uniqueness": 0.03672054455055672, "calibration/confidence_entropy": 0.4213402318788935, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.28261939730200175, "calibration/coverage@15%": 0.48259627937336813, "calibration/coverage@20%": 0.6195370704208987, "calibration/coverage@25%": 0.6242245704208986, "calibration/coverage@30%": 0.8592183505802906, "calibration/coverage@5%": 0.14375, "calibration/distribution_entropy_10": 0.4805007810185836, "calibration/distribution_entropy_100": 0.26217946661122016, "calibration/ece": 0.14547908293804312, "calibration/mean_confidence": 0.766256093108508, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0022569444444444863, "completions/max_length": 3695.0, "completions/max_terminated_length": 3695.0, "completions/mean_length": 1026.3934814453125, "completions/mean_terminated_length": 1028.7162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 262.4, "epoch": 1.3461538461538463, "grad_norm": 0.0004708245978690684, "learning_rate": 2.8846153846153845e-06, "loss": -0.0023, "num_tokens": 1412146755.0, "reward": 1.2862065076828002, "reward_std": 0.11520719677209854, "rewards/accuracy_reward": 0.7211805582046509, "rewards/brier_reward": 0.853473961353302, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9977430582046509, "rewards/mean_confidence_reward": 0.7711458206176758, "sampling/batch_mean_priority_error": 0.053999831678116283, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.30833333333333335, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009317892044782639, "sampling/priority_kl": 0.030000148713588713, "sampling/priority_scale": 0.726933139632456, "sampling/prob_entropy": 10.27895278930664, "sampling/prob_max": 4.993655747966841e-05, "sampling/prob_min": 1.96603676158702e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.339199995994568, "sampling/prompt_draws_total": 40176.0, "sampling/seen_fraction": 0.8159599900245667, "sampling/unseen_fraction": 0.18404000997543335, "signal/accuracy_reward/centered_abs_mean": 0.11303168386220933, "signal/accuracy_reward/group_std_mean": 0.15187525302171706, "signal/accuracy_reward/group_zero_std_frac": 0.550000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05651584193110466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05651584193110466, "signal/advantage_abs_mean": 0.08364091664552689, "signal/advantage_pre_scale_abs_mean": 0.08364091664552689, "signal/advantage_pre_scale_std": 0.17128926217556, "signal/advantage_std": 0.17128926217556, "signal/brier_reward/centered_abs_mean": 0.06988281607627869, "signal/brier_reward/group_std_mean": 0.09534413069486618, "signal/brier_reward/group_zero_std_frac": 0.3583333432674408, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034941408038139346, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034941408038139346, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004210069356486201, "signal/format_reward/group_std_mean": 0.009841996990144253, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021050346782431006, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021050346782431006, "signal/mean_confidence_reward/centered_abs_mean": 0.04588542580604553, "signal/mean_confidence_reward/group_std_mean": 0.06155051738023758, "signal/mean_confidence_reward/group_zero_std_frac": 0.4055555582046509, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.588542481087643e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.588542481087643e-07, "step": 560 }, { "calibration/aurc": 0.10364681460025409, "calibration/batch_distribution_entropy": 0.5636566513218076, "calibration/batch_entropy_100bins": 0.30608440987544927, "calibration/batch_entropy_10bins": 0.5636566513218076, "calibration/batch_entropy_50bins": 0.36031761490111924, "calibration/batch_uniqueness": 0.2434725418054239, "calibration/confidence_entropy": 0.45280722241919025, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.23514197127937336, "calibration/coverage@10%": 0.3917482593559617, "calibration/coverage@15%": 0.799688587902524, "calibration/coverage@20%": 0.9070604873803308, "calibration/coverage@25%": 0.9289817232375979, "calibration/coverage@30%": 0.9775456919060052, "calibration/coverage@5%": 0.3750380765883377, "calibration/distribution_entropy_10": 0.5636566513218076, "calibration/distribution_entropy_100": 0.30608440987544927, "calibration/ece": 0.10714221605744126, "calibration/mean_confidence": 0.745126060704961, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0024305555555555356, "completions/max_length": 3635.8, "completions/max_terminated_length": 3635.8, "completions/mean_length": 1036.5881958007812, "completions/mean_terminated_length": 1039.1330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 266.4, "epoch": 1.3581730769230769, "grad_norm": 0.000573323224671185, "learning_rate": 2.8545673076923082e-06, "loss": -0.002, "num_tokens": 1427154971.0, "reward": 1.3023345947265625, "reward_std": 0.10061877071857453, "rewards/accuracy_reward": 0.7288194417953491, "rewards/brier_reward": 0.878265643119812, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.997569453716278, "rewards/mean_confidence_reward": 0.7324305295944213, "sampling/batch_mean_priority_error": 0.032454187780661635, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.29166666666666663, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009358871728181839, "sampling/priority_kl": 0.030000001564621924, "sampling/priority_scale": 0.7282448708778247, "sampling/prob_entropy": 10.278950500488282, "sampling/prob_max": 5.011486864532344e-05, "sampling/prob_min": 1.9697412426467054e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.351199984550476, "sampling/prompt_draws_total": 40536.0, "sampling/seen_fraction": 0.8193533301353455, "sampling/unseen_fraction": 0.18064666986465455, "signal/accuracy_reward/centered_abs_mean": 0.10447048544883727, "signal/accuracy_reward/group_std_mean": 0.14341845363378525, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05223524272441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05223524272441864, "signal/advantage_abs_mean": 0.0709734357893467, "signal/advantage_pre_scale_abs_mean": 0.0709734357893467, "signal/advantage_pre_scale_std": 0.15031934678554534, "signal/advantage_std": 0.15031934678554534, "signal/brier_reward/centered_abs_mean": 0.05996907502412796, "signal/brier_reward/group_std_mean": 0.08309411108493805, "signal/brier_reward/group_zero_std_frac": 0.31944444477558137, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02998453751206398, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02998453751206398, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0044053818914107975, "signal/format_reward/group_std_mean": 0.00898937825113535, "signal/format_reward/group_zero_std_frac": 0.9611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0022026909457053987, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0022026909457053987, "signal/mean_confidence_reward/centered_abs_mean": 0.05105469524860382, "signal/mean_confidence_reward/group_std_mean": 0.06670601516962052, "signal/mean_confidence_reward/group_zero_std_frac": 0.35000001192092894, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.105469369937054e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.105469369937054e-07, "step": 565 }, { "calibration/aurc": 0.11637660210021865, "calibration/batch_distribution_entropy": 0.6026897912239121, "calibration/batch_entropy_100bins": 0.33899787993418273, "calibration/batch_entropy_10bins": 0.6026897912239121, "calibration/batch_entropy_50bins": 0.39906281931877646, "calibration/batch_uniqueness": 0.3477195535831427, "calibration/confidence_entropy": 0.45724440003584677, "calibration/coverage@0%": 0.12864583333333332, "calibration/coverage@1%": 0.12864583333333332, "calibration/coverage@10%": 0.5159020390070922, "calibration/coverage@15%": 0.6976897116826843, "calibration/coverage@20%": 0.8466561464177916, "calibration/coverage@25%": 0.9232823581560284, "calibration/coverage@30%": 0.9609375, "calibration/coverage@5%": 0.2828125, "calibration/distribution_entropy_10": 0.6026897912239121, "calibration/distribution_entropy_100": 0.33899787993418273, "calibration/ece": 0.110054493500361, "calibration/mean_confidence": 0.7014624187545138, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003211805555555558, "completions/max_length": 3079.2, "completions/max_terminated_length": 3079.2, "completions/mean_length": 1003.11484375, "completions/mean_terminated_length": 1006.3870239257812, "completions/min_length": 0.0, "completions/min_terminated_length": 232.4, "epoch": 1.3701923076923077, "grad_norm": 0.0005236753495410085, "learning_rate": 2.8245192307692307e-06, "loss": -0.0025, "num_tokens": 1441808870.0, "reward": 1.3041511535644532, "reward_std": 0.09854458272457123, "rewards/accuracy_reward": 0.7466145753860474, "rewards/brier_reward": 0.8648854255676269, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967881917953492, "rewards/mean_confidence_reward": 0.7043923616409302, "sampling/batch_mean_priority_error": 0.039206801273562374, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.28888888888888886, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009391096048057079, "sampling/priority_kl": 0.0300001073628664, "sampling/priority_scale": 0.7299370229942724, "sampling/prob_entropy": 10.27895393371582, "sampling/prob_max": 5.030815664213151e-05, "sampling/prob_min": 1.973052021639887e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3631999969482422, "sampling/prompt_draws_total": 40896.0, "sampling/seen_fraction": 0.8229599952697754, "sampling/unseen_fraction": 0.1770400047302246, "signal/accuracy_reward/centered_abs_mean": 0.10590820461511612, "signal/accuracy_reward/group_std_mean": 0.14967113137245178, "signal/accuracy_reward/group_zero_std_frac": 0.5305555760860443, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05295410230755806, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05295410230755806, "signal/advantage_abs_mean": 0.06743100956082344, "signal/advantage_pre_scale_abs_mean": 0.06743100956082344, "signal/advantage_pre_scale_std": 0.14558007717132568, "signal/advantage_std": 0.14558007717132568, "signal/brier_reward/centered_abs_mean": 0.059187816083431245, "signal/brier_reward/group_std_mean": 0.08274016678333282, "signal/brier_reward/group_zero_std_frac": 0.2583333343267441, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029593908041715623, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029593908041715623, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005626085097901523, "signal/format_reward/group_std_mean": 0.011389839416369795, "signal/format_reward/group_zero_std_frac": 0.950000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0028130425489507616, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0028130425489507616, "signal/mean_confidence_reward/centered_abs_mean": 0.05424913391470909, "signal/mean_confidence_reward/group_std_mean": 0.071955007314682, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777778208255766, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.424913524620934e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.424913524620934e-07, "step": 570 }, { "calibration/aurc": 0.16748827581278503, "calibration/batch_distribution_entropy": 0.633057807141147, "calibration/batch_entropy_100bins": 0.3477108957075576, "calibration/batch_entropy_10bins": 0.633057807141147, "calibration/batch_entropy_50bins": 0.40931964051177916, "calibration/batch_uniqueness": 0.42872305796571275, "calibration/confidence_entropy": 0.47054753129181115, "calibration/coverage@0%": 0.08511749347258486, "calibration/coverage@1%": 0.08511749347258486, "calibration/coverage@10%": 0.34360313315926894, "calibration/coverage@15%": 0.571150199257936, "calibration/coverage@20%": 0.6690105812834959, "calibration/coverage@25%": 0.8905162311630107, "calibration/coverage@30%": 0.9324671230717569, "calibration/coverage@5%": 0.08511749347258486, "calibration/distribution_entropy_10": 0.633057807141147, "calibration/distribution_entropy_100": 0.3477108957075576, "calibration/ece": 0.12355976349458997, "calibration/mean_confidence": 0.7184623108726773, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001996527777777768, "completions/max_length": 3514.8, "completions/max_terminated_length": 3514.8, "completions/mean_length": 1018.8572143554687, "completions/mean_terminated_length": 1020.90146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 272.2, "epoch": 1.3822115384615383, "grad_norm": 0.0005015527713112533, "learning_rate": 2.7944711538461537e-06, "loss": -0.0021, "num_tokens": 1456619897.0, "reward": 1.3007010221481323, "reward_std": 0.0904072642326355, "rewards/accuracy_reward": 0.7381076335906982, "rewards/brier_reward": 0.8652769207954407, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9980034708976746, "rewards/mean_confidence_reward": 0.7014843702316285, "sampling/batch_mean_priority_error": 0.03784606769545894, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24444444444444446, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009430542774498462, "sampling/priority_kl": 0.029999880492687224, "sampling/priority_scale": 0.7312095224158839, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 5.048633538535796e-05, "sampling/prob_min": 1.9768124184338375e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3751999855041503, "sampling/prompt_draws_total": 41256.0, "sampling/seen_fraction": 0.8261666774749756, "sampling/unseen_fraction": 0.17383332252502443, "signal/accuracy_reward/centered_abs_mean": 0.10423719584941864, "signal/accuracy_reward/group_std_mean": 0.13614998161792755, "signal/accuracy_reward/group_zero_std_frac": 0.6166666865348815, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05211859792470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05211859792470932, "signal/advantage_abs_mean": 0.066029342263937, "signal/advantage_pre_scale_abs_mean": 0.066029342263937, "signal/advantage_pre_scale_std": 0.13949486911296843, "signal/advantage_std": 0.13949486911296843, "signal/brier_reward/centered_abs_mean": 0.0614675983786583, "signal/brier_reward/group_std_mean": 0.08250565230846404, "signal/brier_reward/group_zero_std_frac": 0.26944445073604584, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03073379918932915, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03073379918932915, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.003792317700572312, "signal/format_reward/group_std_mean": 0.009447787888348103, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001896158850286156, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001896158850286156, "signal/mean_confidence_reward/centered_abs_mean": 0.05299696773290634, "signal/mean_confidence_reward/group_std_mean": 0.06921513825654983, "signal/mean_confidence_reward/group_zero_std_frac": 0.2833333432674408, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.299696567817592e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.299696567817592e-07, "step": 575 }, { "calibration/aurc": 0.09452517261125645, "calibration/batch_distribution_entropy": 0.5651422000292813, "calibration/batch_entropy_100bins": 0.3085382199945976, "calibration/batch_entropy_10bins": 0.5651422000292813, "calibration/batch_entropy_50bins": 0.36320620047106555, "calibration/batch_uniqueness": 0.25278723390567037, "calibration/confidence_entropy": 0.44761963367691926, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.13177083333333334, "calibration/coverage@10%": 0.7177170583115753, "calibration/coverage@15%": 0.740958724978242, "calibration/coverage@20%": 0.8317534268929505, "calibration/coverage@25%": 0.8920107158398608, "calibration/coverage@30%": 0.9472584856396867, "calibration/coverage@5%": 0.6343128916449087, "calibration/distribution_entropy_10": 0.5651422000292813, "calibration/distribution_entropy_100": 0.3085382199945976, "calibration/ece": 0.11731548411662304, "calibration/mean_confidence": 0.7440300815926894, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 3870.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 1048.67880859375, "completions/mean_terminated_length": 1051.9510986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 249.2, "epoch": 1.3942307692307692, "grad_norm": 0.0004841455665882677, "learning_rate": 2.7644230769230775e-06, "loss": -0.0025, "num_tokens": 1471801285.0, "reward": 1.2834061145782472, "reward_std": 0.09844976812601089, "rewards/accuracy_reward": 0.7129340171813965, "rewards/brier_reward": 0.8569887042045593, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.996874988079071, "rewards/mean_confidence_reward": 0.7255121350288392, "sampling/batch_mean_priority_error": 0.04486913942545373, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2111111111111111, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009472385980188847, "sampling/priority_kl": 0.030000155419111253, "sampling/priority_scale": 0.7318387806648389, "sampling/prob_entropy": 10.278948211669922, "sampling/prob_max": 5.0635512889130044e-05, "sampling/prob_min": 1.9810545927612112e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3871999979019165, "sampling/prompt_draws_total": 41616.0, "sampling/seen_fraction": 0.8288466572761536, "sampling/unseen_fraction": 0.17115334272384644, "signal/accuracy_reward/centered_abs_mean": 0.10468207597732544, "signal/accuracy_reward/group_std_mean": 0.14231548309326172, "signal/accuracy_reward/group_zero_std_frac": 0.5777777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05234103798866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05234103798866272, "signal/advantage_abs_mean": 0.06959071755409241, "signal/advantage_pre_scale_abs_mean": 0.06959071755409241, "signal/advantage_pre_scale_std": 0.1507834017276764, "signal/advantage_std": 0.1507834017276764, "signal/brier_reward/centered_abs_mean": 0.06135156080126762, "signal/brier_reward/group_std_mean": 0.08509978950023651, "signal/brier_reward/group_zero_std_frac": 0.34166666865348816, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03067578040063381, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03067578040063381, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005696614552289247, "signal/format_reward/group_std_mean": 0.011707442020997405, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0028483072761446237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0028483072761446237, "signal/mean_confidence_reward/centered_abs_mean": 0.05245281159877777, "signal/mean_confidence_reward/group_std_mean": 0.0707058347761631, "signal/mean_confidence_reward/group_zero_std_frac": 0.36111111640930177, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.245280874532909e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.245280874532909e-07, "step": 580 }, { "calibration/aurc": 0.13932227451395895, "calibration/batch_distribution_entropy": 0.6448697784807116, "calibration/batch_entropy_100bins": 0.36058733309465746, "calibration/batch_entropy_10bins": 0.6448697784807116, "calibration/batch_entropy_50bins": 0.4244775742648617, "calibration/batch_uniqueness": 0.42685474793608336, "calibration/confidence_entropy": 0.4686404208047999, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.46281258641283707, "calibration/coverage@15%": 0.6135447063646275, "calibration/coverage@20%": 0.7540883443113853, "calibration/coverage@25%": 0.8325451428643802, "calibration/coverage@30%": 0.9068541276502673, "calibration/coverage@5%": 0.2557474818265518, "calibration/distribution_entropy_10": 0.6448697784807116, "calibration/distribution_entropy_100": 0.36058733309465746, "calibration/ece": 0.09226729368323519, "calibration/mean_confidence": 0.6993229161116602, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005468749999999978, "completions/max_length": 3880.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 1112.0519287109375, "completions/mean_terminated_length": 1118.2107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 318.4, "epoch": 1.40625, "grad_norm": 0.000712965673301369, "learning_rate": 2.7343750000000004e-06, "loss": -0.0057, "num_tokens": 1487703483.0, "reward": 1.2871733903884888, "reward_std": 0.11923541873693466, "rewards/accuracy_reward": 0.7236979126930236, "rewards/brier_reward": 0.8561024308204651, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9945312380790711, "rewards/mean_confidence_reward": 0.760451364517212, "sampling/batch_mean_priority_error": 0.05240654750019175, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2583333333333334, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009516830556094646, "sampling/priority_kl": 0.03000010997056961, "sampling/priority_scale": 0.7327525675529614, "sampling/prob_entropy": 10.278953361511231, "sampling/prob_max": 5.079586335341446e-05, "sampling/prob_min": 1.9756658366532066e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3992000102996827, "sampling/prompt_draws_total": 41976.0, "sampling/seen_fraction": 0.8316733241081238, "sampling/unseen_fraction": 0.16832667589187622, "signal/accuracy_reward/centered_abs_mean": 0.11232096254825592, "signal/accuracy_reward/group_std_mean": 0.1508895754814148, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05616048127412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05616048127412796, "signal/advantage_abs_mean": 0.08523141592741013, "signal/advantage_pre_scale_abs_mean": 0.08523141592741013, "signal/advantage_pre_scale_std": 0.1773594230413437, "signal/advantage_std": 0.1773594230413437, "signal/brier_reward/centered_abs_mean": 0.07225770503282547, "signal/brier_reward/group_std_mean": 0.09987465739250183, "signal/brier_reward/group_zero_std_frac": 0.36111111640930177, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036128852516412735, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036128852516412735, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009673394076526166, "signal/format_reward/group_std_mean": 0.01826365813612938, "signal/format_reward/group_zero_std_frac": 0.9277778029441833, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004836697038263083, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004836697038263083, "signal/mean_confidence_reward/centered_abs_mean": 0.053898122161626816, "signal/mean_confidence_reward/group_std_mean": 0.07314420342445374, "signal/mean_confidence_reward/group_zero_std_frac": 0.4138888895511627, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.389812258727034e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.389812258727034e-07, "step": 585 }, { "calibration/aurc": 0.13942999390042368, "calibration/batch_distribution_entropy": 0.5017374369243515, "calibration/batch_entropy_100bins": 0.2704270845744181, "calibration/batch_entropy_10bins": 0.5017374369243515, "calibration/batch_entropy_50bins": 0.3183423884874351, "calibration/batch_uniqueness": 0.120202369989494, "calibration/confidence_entropy": 0.4277014706590796, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.13333333333333333, "calibration/coverage@10%": 0.325, "calibration/coverage@15%": 0.6423574847693646, "calibration/coverage@20%": 0.7034336923411663, "calibration/coverage@25%": 0.8751958224543082, "calibration/coverage@30%": 0.9394255874673629, "calibration/coverage@5%": 0.2786458333333333, "calibration/distribution_entropy_10": 0.5017374369243515, "calibration/distribution_entropy_100": 0.2704270845744181, "calibration/ece": 0.1212374891209748, "calibration/mean_confidence": 0.7874815056570933, "calibration/unique_confidence_per_question": 0.01927083333333333, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003038194444444464, "completions/max_length": 3645.8, "completions/max_terminated_length": 3645.8, "completions/mean_length": 1017.2987915039063, "completions/mean_terminated_length": 1020.4353637695312, "completions/min_length": 0.0, "completions/min_terminated_length": 272.4, "epoch": 1.4182692307692308, "grad_norm": 0.00043438063585199416, "learning_rate": 2.7043269230769233e-06, "loss": -0.0041, "num_tokens": 1502516589.0, "reward": 1.3097166061401366, "reward_std": 0.0976711392402649, "rewards/accuracy_reward": 0.7488715171813964, "rewards/brier_reward": 0.8735842347145081, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9969618082046509, "rewards/mean_confidence_reward": 0.7808419942855835, "sampling/batch_mean_priority_error": 0.044314487949195534, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2555555555555556, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009564095363020896, "sampling/priority_kl": 0.02999996468424797, "sampling/priority_scale": 0.7341858922736719, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 5.097756948089227e-05, "sampling/prob_min": 1.9790743317571467e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4111999988555908, "sampling/prompt_draws_total": 42336.0, "sampling/seen_fraction": 0.8347866654396057, "sampling/unseen_fraction": 0.16521333456039428, "signal/accuracy_reward/centered_abs_mean": 0.090966796875, "signal/accuracy_reward/group_std_mean": 0.12698046565055848, "signal/accuracy_reward/group_zero_std_frac": 0.6083333373069764, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0454833984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0454833984375, "signal/advantage_abs_mean": 0.06731701195240021, "signal/advantage_pre_scale_abs_mean": 0.06731701195240021, "signal/advantage_pre_scale_std": 0.1525650978088379, "signal/advantage_std": 0.1525650978088379, "signal/brier_reward/centered_abs_mean": 0.055035379528999326, "signal/brier_reward/group_std_mean": 0.07981750667095185, "signal/brier_reward/group_zero_std_frac": 0.37777777910232546, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027517689764499663, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027517689764499663, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0055501303169876335, "signal/format_reward/group_std_mean": 0.01122641284018755, "signal/format_reward/group_zero_std_frac": 0.9527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027750651584938168, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027750651584938168, "signal/mean_confidence_reward/centered_abs_mean": 0.0433946542441845, "signal/mean_confidence_reward/group_std_mean": 0.0584526002407074, "signal/mean_confidence_reward/group_zero_std_frac": 0.4250000059604645, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.339465363045747e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.339465363045747e-07, "step": 590 }, { "calibration/aurc": 0.11260714434102297, "calibration/batch_distribution_entropy": 0.5449134079570979, "calibration/batch_entropy_100bins": 0.29232000449749385, "calibration/batch_entropy_10bins": 0.5449134079570979, "calibration/batch_entropy_50bins": 0.34411437959640323, "calibration/batch_uniqueness": 0.21047108118652816, "calibration/confidence_entropy": 0.43431141306142684, "calibration/coverage@0%": 0.1484375, "calibration/coverage@1%": 0.1484375, "calibration/coverage@10%": 0.6736571618037136, "calibration/coverage@15%": 0.7293200154730327, "calibration/coverage@20%": 0.7685952696728559, "calibration/coverage@25%": 0.8, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.428125, "calibration/distribution_entropy_10": 0.5449134079570979, "calibration/distribution_entropy_100": 0.29232000449749385, "calibration/ece": 0.137520032051282, "calibration/mean_confidence": 0.7773144617595051, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006423611111111139, "completions/max_length": 3772.2, "completions/max_terminated_length": 3772.2, "completions/mean_length": 1045.2372680664062, "completions/mean_terminated_length": 1052.1338256835938, "completions/min_length": 0.0, "completions/min_terminated_length": 278.6, "epoch": 1.4302884615384617, "grad_norm": 0.0004948692512698472, "learning_rate": 2.6742788461538467e-06, "loss": -0.0076, "num_tokens": 1517658618.0, "reward": 1.3219900369644164, "reward_std": 0.09807792603969574, "rewards/accuracy_reward": 0.7714409708976746, "rewards/brier_reward": 0.8789479613304139, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9935763716697693, "rewards/mean_confidence_reward": 0.7427256941795349, "sampling/batch_mean_priority_error": 0.03236808832125963, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2555555555555556, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009601323492825031, "sampling/priority_kl": 0.0300000436604023, "sampling/priority_scale": 0.7354452908271923, "sampling/prob_entropy": 10.278949165344239, "sampling/prob_max": 5.1152726518921555e-05, "sampling/prob_min": 1.98265362996608e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.423200011253357, "sampling/prompt_draws_total": 42696.0, "sampling/seen_fraction": 0.8377133131027221, "sampling/unseen_fraction": 0.16228668689727782, "signal/accuracy_reward/centered_abs_mean": 0.09402669221162796, "signal/accuracy_reward/group_std_mean": 0.12942231297492982, "signal/accuracy_reward/group_zero_std_frac": 0.6027777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04701334610581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04701334610581398, "signal/advantage_abs_mean": 0.06896353140473366, "signal/advantage_pre_scale_abs_mean": 0.06896353140473366, "signal/advantage_pre_scale_std": 0.15308170765638351, "signal/advantage_std": 0.15308170765638351, "signal/brier_reward/centered_abs_mean": 0.055911561101675035, "signal/brier_reward/group_std_mean": 0.07856666445732116, "signal/brier_reward/group_zero_std_frac": 0.3083333343267441, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027955780550837517, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027955780550837517, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01025390620343387, "signal/format_reward/group_std_mean": 0.017292334325611593, "signal/format_reward/group_zero_std_frac": 0.9333333253860474, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005126953101716935, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005126953101716935, "signal/mean_confidence_reward/centered_abs_mean": 0.046098099648952486, "signal/mean_confidence_reward/group_std_mean": 0.06332936584949493, "signal/mean_confidence_reward/group_zero_std_frac": 0.33333333134651183, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.6098096504465504e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.6098096504465504e-07, "step": 595 }, { "calibration/aurc": 0.10463935890886092, "calibration/batch_distribution_entropy": 0.6619123417933046, "calibration/batch_entropy_100bins": 0.36687692710750464, "calibration/batch_entropy_10bins": 0.6619123417933046, "calibration/batch_entropy_50bins": 0.43188158257200693, "calibration/batch_uniqueness": 0.4892413688617815, "calibration/confidence_entropy": 0.4763627433511208, "calibration/coverage@0%": 0.11780104712041886, "calibration/coverage@1%": 0.11780104712041886, "calibration/coverage@10%": 0.5687064055814526, "calibration/coverage@15%": 0.7911625669918922, "calibration/coverage@20%": 0.8315940634877009, "calibration/coverage@25%": 0.9298859420090697, "calibration/coverage@30%": 0.954569190600522, "calibration/coverage@5%": 0.3743518721036731, "calibration/distribution_entropy_10": 0.6619123417933046, "calibration/distribution_entropy_100": 0.36687692710750464, "calibration/ece": 0.15183055306638016, "calibration/mean_confidence": 0.7065397890530327, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3725.4, "completions/max_terminated_length": 3725.4, "completions/mean_length": 1078.1233642578125, "completions/mean_terminated_length": 1082.4756103515624, "completions/min_length": 0.0, "completions/min_terminated_length": 256.6, "epoch": 1.4423076923076923, "grad_norm": 0.0004698413540609181, "learning_rate": 2.6442307692307696e-06, "loss": -0.0045, "num_tokens": 1533203047.0, "reward": 1.3211289644241333, "reward_std": 0.0979426309466362, "rewards/accuracy_reward": 0.7806423664093017, "rewards/brier_reward": 0.8656814217567443, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9959201335906982, "rewards/mean_confidence_reward": 0.7000954866409301, "sampling/batch_mean_priority_error": 0.032977938594612934, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2833333333333333, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009629619680345059, "sampling/priority_kl": 0.030000071227550506, "sampling/priority_scale": 0.7374177158577367, "sampling/prob_entropy": 10.278955268859864, "sampling/prob_max": 5.13555969519075e-05, "sampling/prob_min": 1.9854630227200688e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.435200023651123, "sampling/prompt_draws_total": 43056.0, "sampling/seen_fraction": 0.8410533308982849, "sampling/unseen_fraction": 0.1589466691017151, "signal/accuracy_reward/centered_abs_mean": 0.10369466096162797, "signal/accuracy_reward/group_std_mean": 0.14326806515455245, "signal/accuracy_reward/group_zero_std_frac": 0.5694444656372071, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05184733048081398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05184733048081398, "signal/advantage_abs_mean": 0.06741442307829856, "signal/advantage_pre_scale_abs_mean": 0.06741442307829856, "signal/advantage_pre_scale_std": 0.14682429730892183, "signal/advantage_std": 0.14682429730892183, "signal/brier_reward/centered_abs_mean": 0.05674092769622803, "signal/brier_reward/group_std_mean": 0.07852299660444259, "signal/brier_reward/group_zero_std_frac": 0.22500000298023223, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028370463848114015, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.028370463848114015, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0074707032181322575, "signal/format_reward/group_std_mean": 0.016295704618096353, "signal/format_reward/group_zero_std_frac": 0.9250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037353516090661287, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0037353516090661287, "signal/mean_confidence_reward/centered_abs_mean": 0.051773006469011305, "signal/mean_confidence_reward/group_std_mean": 0.06856385469436646, "signal/mean_confidence_reward/group_zero_std_frac": 0.24166667461395264, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.177300749892311e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.177300749892311e-07, "step": 600 }, { "epoch": 1.4423076923076923, "eval_calibration/aurc": 0.11749798048665533, "eval_calibration/batch_distribution_entropy": 0.7077334731137986, "eval_calibration/batch_entropy_100bins": 0.3875815454761296, "eval_calibration/batch_entropy_10bins": 0.7077334731137986, "eval_calibration/batch_entropy_50bins": 0.4562547243176337, "eval_calibration/batch_uniqueness": 0.5498560286798497, "eval_calibration/confidence_entropy": 0.4879134814798048, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5772925764192139, "eval_calibration/coverage@15%": 0.7458515283842795, "eval_calibration/coverage@20%": 0.8148471615720524, "eval_calibration/coverage@25%": 0.9065502183406113, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.7077334731137986, "eval_calibration/distribution_entropy_100": 0.3875815454761296, "eval_calibration/ece": 0.03554585152838413, "eval_calibration/mean_confidence": 0.7082096069868996, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 3122.0, "eval_completions/max_terminated_length": 3122.0, "eval_completions/mean_length": 1089.3387451171875, "eval_completions/mean_terminated_length": 1096.1107584635417, "eval_completions/min_length": 155.83333333333334, "eval_completions/min_terminated_length": 358.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 1533203047.0, "eval_reward": 1.2864523927370708, "eval_reward_std": 0.2984392096598943, "eval_rewards/accuracy_reward": 0.7230902910232544, "eval_rewards/brier_reward": 0.8558767636617025, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9939236144224802, "eval_rewards/mean_confidence_reward": 0.7039062678813934, "eval_runtime": 191.414, "eval_samples_per_second": 5.224, "eval_signal/accuracy_reward/centered_abs_mean": 0.3887261301279068, "eval_signal/accuracy_reward/group_std_mean": 0.44594499965508777, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1943630650639534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.1943630650639534, "eval_signal/advantage_abs_mean": 0.24910660833120346, "eval_signal/advantage_pre_scale_abs_mean": 0.24910660833120346, "eval_signal/advantage_pre_scale_std": 0.29744037985801697, "eval_signal/advantage_std": 0.29744037985801697, "eval_signal/brier_reward/centered_abs_mean": 0.1458252047499021, "eval_signal/brier_reward/group_std_mean": 0.19646542519330978, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07291260237495105, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.07291260237495105, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.011664496424297491, "eval_signal/format_reward/group_std_mean": 0.031383837262789406, "eval_signal/format_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.18724499146143594, "eval_signal/mean_confidence_reward/group_std_mean": 0.22393441945314407, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.8724499189678074e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.8724499189678074e-06, "eval_steps_per_second": 0.031, "step": 600 }, { "epoch": 1.4423076923076923, "step": 600, "train_probe_calibration/aurc": 0.0811248529110322, "train_probe_calibration/batch_distribution_entropy": 0.6947460190964702, "train_probe_calibration/batch_entropy_100bins": 0.37999471276504704, "train_probe_calibration/batch_entropy_10bins": 0.6947460190964702, "train_probe_calibration/batch_entropy_50bins": 0.44732362760407207, "train_probe_calibration/batch_uniqueness": 0.5262528608423931, "train_probe_calibration/confidence_entropy": 0.4823398318438881, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.7523975588491717, "train_probe_calibration/coverage@15%": 0.8116826503923278, "train_probe_calibration/coverage@20%": 0.8962510897994769, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.4219703574542284, "train_probe_calibration/distribution_entropy_10": 0.6947460190964702, "train_probe_calibration/distribution_entropy_100": 0.37999471276504704, "train_probe_calibration/ece": 0.07541412380122044, "train_probe_calibration/mean_confidence": 0.7123801220575415, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.005902777777777775, "train_probe_completions/max_length": 3296.6666666666665, "train_probe_completions/max_terminated_length": 3296.6666666666665, "train_probe_completions/mean_length": 1121.4295450846355, "train_probe_completions/mean_terminated_length": 1128.2926839192708, "train_probe_completions/min_length": 188.33333333333334, "train_probe_completions/min_terminated_length": 276.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 1533203047.0, "train_probe_reward": 1.3208882013956706, "train_probe_reward_std": 0.2697082112232844, "train_probe_rewards/accuracy_reward": 0.7690972288449606, "train_probe_rewards/brier_reward": 0.8770051995913187, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.995659718910853, "train_probe_rewards/mean_confidence_reward": 0.7092882096767426, "train_probe_runtime": 190.0224, "train_probe_samples_per_second": 5.263, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3442925363779068, "train_probe_signal/accuracy_reward/group_std_mean": 0.4188603013753891, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1721462681889534, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1721462681889534, "train_probe_signal/advantage_abs_mean": 0.2152716045578321, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2152716045578321, "train_probe_signal/advantage_pre_scale_std": 0.2695246562361717, "train_probe_signal/advantage_std": 0.2695246562361717, "train_probe_signal/brier_reward/centered_abs_mean": 0.12592938169836998, "train_probe_signal/brier_reward/group_std_mean": 0.1717534065246582, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06296469084918499, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.06296469084918499, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.00830078125, "train_probe_signal/format_reward/group_std_mean": 0.021562909707427025, "train_probe_signal/format_reward/group_zero_std_frac": 0.8888888955116272, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004150390625, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004150390625, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.18776040027538934, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.22326893856128058, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.8776040064949484e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.8776040064949484e-06, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.1625902676601865, "calibration/batch_distribution_entropy": 0.688178501134466, "calibration/batch_entropy_100bins": 0.3773364125369895, "calibration/batch_entropy_10bins": 0.688178501134466, "calibration/batch_entropy_50bins": 0.44419431958654004, "calibration/batch_uniqueness": 0.5459930237340862, "calibration/confidence_entropy": 0.4974271541450772, "calibration/coverage@0%": 0.08655913978494624, "calibration/coverage@1%": 0.1685434739886016, "calibration/coverage@10%": 0.328105061226429, "calibration/coverage@15%": 0.4577343120676735, "calibration/coverage@20%": 0.7047311792905762, "calibration/coverage@25%": 0.8369028378426489, "calibration/coverage@30%": 0.952465335711409, "calibration/coverage@5%": 0.23120665936719167, "calibration/distribution_entropy_10": 0.688178501134466, "calibration/distribution_entropy_100": 0.3773364125369895, "calibration/ece": 0.1463983525444193, "calibration/mean_confidence": 0.6849986304418707, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008854166666666653, "completions/max_length": 3869.8, "completions/max_terminated_length": 3869.8, "completions/mean_length": 1103.256103515625, "completions/mean_terminated_length": 1113.0990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 276.2, "epoch": 1.4543269230769231, "grad_norm": 0.00044460591743700206, "learning_rate": 2.6141826923076926e-06, "loss": -0.0097, "num_tokens": 1548997005.0, "reward": 1.2924524068832397, "reward_std": 0.10913865864276887, "rewards/accuracy_reward": 0.745312488079071, "rewards/brier_reward": 0.8484323143959045, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9911458253860473, "rewards/mean_confidence_reward": 0.7084201455116272, "sampling/batch_mean_priority_error": 0.03887890530603154, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.26666666666666666, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009661880135536195, "sampling/priority_kl": 0.030000338703393935, "sampling/priority_scale": 0.7392990172142163, "sampling/prob_entropy": 10.27895565032959, "sampling/prob_max": 5.1556043763412164e-05, "sampling/prob_min": 1.9883858840330504e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4471999883651734, "sampling/prompt_draws_total": 43416.0, "sampling/seen_fraction": 0.8442333340644836, "sampling/unseen_fraction": 0.15576666593551636, "signal/accuracy_reward/centered_abs_mean": 0.10305989682674407, "signal/accuracy_reward/group_std_mean": 0.14260239899158478, "signal/accuracy_reward/group_zero_std_frac": 0.5666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05152994841337204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05152994841337204, "signal/advantage_abs_mean": 0.075344218313694, "signal/advantage_pre_scale_abs_mean": 0.075344218313694, "signal/advantage_pre_scale_std": 0.16476980745792388, "signal/advantage_std": 0.16476980745792388, "signal/brier_reward/centered_abs_mean": 0.06039917916059494, "signal/brier_reward/group_std_mean": 0.08526878505945205, "signal/brier_reward/group_zero_std_frac": 0.20833333432674409, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03019958958029747, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03019958958029747, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.014333767257630825, "signal/format_reward/group_std_mean": 0.02507205940783024, "signal/format_reward/group_zero_std_frac": 0.9027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007166883628815413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007166883628815413, "signal/mean_confidence_reward/centered_abs_mean": 0.05270670726895332, "signal/mean_confidence_reward/group_std_mean": 0.07211160734295845, "signal/mean_confidence_reward/group_zero_std_frac": 0.21388888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.270670556001277e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.270670556001277e-07, "step": 605 }, { "calibration/aurc": 0.14458147639177027, "calibration/batch_distribution_entropy": 0.6089697186869654, "calibration/batch_entropy_100bins": 0.3300116115365298, "calibration/batch_entropy_10bins": 0.6089697186869654, "calibration/batch_entropy_50bins": 0.38848432955766393, "calibration/batch_uniqueness": 0.4002591418256035, "calibration/confidence_entropy": 0.47229101300975074, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.19109662073490813, "calibration/coverage@10%": 0.4357939632545932, "calibration/coverage@15%": 0.5927083333333334, "calibration/coverage@20%": 0.703125, "calibration/coverage@25%": 0.8250909391534391, "calibration/coverage@30%": 0.8649801587301587, "calibration/coverage@5%": 0.32838336614173225, "calibration/distribution_entropy_10": 0.6089697186869654, "calibration/distribution_entropy_100": 0.3300116115365298, "calibration/ece": 0.15325526366495854, "calibration/mean_confidence": 0.7555445738553516, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003211805555555558, "completions/max_length": 3788.4, "completions/max_terminated_length": 3788.4, "completions/mean_length": 1117.24462890625, "completions/mean_terminated_length": 1120.8237548828124, "completions/min_length": 0.0, "completions/min_terminated_length": 310.0, "epoch": 1.4663461538461537, "grad_norm": 0.00047830064431764185, "learning_rate": 2.584134615384616e-06, "loss": -0.0029, "num_tokens": 1564966735.0, "reward": 1.2964518308639525, "reward_std": 0.10213772356510162, "rewards/accuracy_reward": 0.7368055582046509, "rewards/brier_reward": 0.8592951416969299, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967882037162781, "rewards/mean_confidence_reward": 0.735937488079071, "sampling/batch_mean_priority_error": 0.03690880815930133, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2722222222222222, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009698636457324028, "sampling/priority_kl": 0.02999996729195118, "sampling/priority_scale": 0.7415312468772755, "sampling/prob_entropy": 10.278956413269043, "sampling/prob_max": 5.177144994377159e-05, "sampling/prob_min": 1.990957753150724e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4591999769210815, "sampling/prompt_draws_total": 43776.0, "sampling/seen_fraction": 0.8475733399391174, "sampling/unseen_fraction": 0.15242666006088257, "signal/accuracy_reward/centered_abs_mean": 0.09847005158662796, "signal/accuracy_reward/group_std_mean": 0.13745512515306474, "signal/accuracy_reward/group_zero_std_frac": 0.5805555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04923502579331398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04923502579331398, "signal/advantage_abs_mean": 0.07168345451354981, "signal/advantage_pre_scale_abs_mean": 0.07168345451354981, "signal/advantage_pre_scale_std": 0.1520264506340027, "signal/advantage_std": 0.1520264506340027, "signal/brier_reward/centered_abs_mean": 0.05799070745706558, "signal/brier_reward/group_std_mean": 0.08116589337587357, "signal/brier_reward/group_zero_std_frac": 0.20833333432674409, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02899535372853279, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02899535372853279, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005799696128815412, "signal/format_reward/group_std_mean": 0.012281404808163643, "signal/format_reward/group_zero_std_frac": 0.9444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002899848064407706, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002899848064407706, "signal/mean_confidence_reward/centered_abs_mean": 0.048818358778953554, "signal/mean_confidence_reward/group_std_mean": 0.0655674695968628, "signal/mean_confidence_reward/group_zero_std_frac": 0.22500000298023223, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.881835764081189e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.881835764081189e-07, "step": 610 }, { "calibration/aurc": 0.10717646892595933, "calibration/batch_distribution_entropy": 0.5944995185996648, "calibration/batch_entropy_100bins": 0.3223670980698781, "calibration/batch_entropy_10bins": 0.5944995185996648, "calibration/batch_entropy_50bins": 0.3794853319919132, "calibration/batch_uniqueness": 0.3876516859568565, "calibration/confidence_entropy": 0.47987866269041757, "calibration/coverage@0%": 0.27700645890043146, "calibration/coverage@1%": 0.4954637668277688, "calibration/coverage@10%": 0.6712006962576152, "calibration/coverage@15%": 0.7573625761531767, "calibration/coverage@20%": 0.7733625761531766, "calibration/coverage@25%": 0.8140936466492603, "calibration/coverage@30%": 0.8203599651871192, "calibration/coverage@5%": 0.5960749921300668, "calibration/distribution_entropy_10": 0.5944995185996648, "calibration/distribution_entropy_100": 0.3223670980698781, "calibration/ece": 0.13857618576745728, "calibration/mean_confidence": 0.7310599092643002, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00659722222222221, "completions/max_length": 3889.6, "completions/max_terminated_length": 3889.6, "completions/mean_length": 1076.1330932617188, "completions/mean_terminated_length": 1083.3836547851563, "completions/min_length": 0.0, "completions/min_terminated_length": 246.2, "epoch": 1.4783653846153846, "grad_norm": 0.0006141048506833613, "learning_rate": 2.554086538461539e-06, "loss": -0.0066, "num_tokens": 1580488364.0, "reward": 1.314866876602173, "reward_std": 0.10772739797830581, "rewards/accuracy_reward": 0.7642361044883728, "rewards/brier_reward": 0.8720798373222352, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9934027910232544, "rewards/mean_confidence_reward": 0.7517708301544189, "sampling/batch_mean_priority_error": 0.03643278677661457, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2361111111111111, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009736244194209575, "sampling/priority_kl": 0.03000003136694431, "sampling/priority_scale": 0.7432866037590429, "sampling/prob_entropy": 10.278955078125, "sampling/prob_max": 5.196688580326736e-05, "sampling/prob_min": 1.9939581397920848e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4711999893188477, "sampling/prompt_draws_total": 44136.0, "sampling/seen_fraction": 0.8505399942398071, "sampling/unseen_fraction": 0.14946000576019286, "signal/accuracy_reward/centered_abs_mean": 0.10202908217906952, "signal/accuracy_reward/group_std_mean": 0.14247817397117615, "signal/accuracy_reward/group_zero_std_frac": 0.5638888835906982, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05101454108953476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05101454108953476, "signal/advantage_abs_mean": 0.07561420053243637, "signal/advantage_pre_scale_abs_mean": 0.07561420053243637, "signal/advantage_pre_scale_std": 0.1636774629354477, "signal/advantage_std": 0.1636774629354477, "signal/brier_reward/centered_abs_mean": 0.056111641228199005, "signal/brier_reward/group_std_mean": 0.07825350314378739, "signal/brier_reward/group_zero_std_frac": 0.26666667461395266, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028055820614099503, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.028055820614099503, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.011002604011446237, "signal/format_reward/group_std_mean": 0.019482703506946565, "signal/format_reward/group_zero_std_frac": 0.9222222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005501302005723119, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005501302005723119, "signal/mean_confidence_reward/centered_abs_mean": 0.04708550795912743, "signal/mean_confidence_reward/group_std_mean": 0.063015665858984, "signal/mean_confidence_reward/group_zero_std_frac": 0.2750000059604645, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.7085508754207693e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.7085508754207693e-07, "step": 615 }, { "calibration/aurc": 0.08026650067991369, "calibration/batch_distribution_entropy": 0.6396196020074065, "calibration/batch_entropy_100bins": 0.35237163811846184, "calibration/batch_entropy_10bins": 0.6396196020074065, "calibration/batch_entropy_50bins": 0.41480619106771527, "calibration/batch_uniqueness": 0.4768779100385724, "calibration/confidence_entropy": 0.5046711555324517, "calibration/coverage@0%": 0.23890747389033945, "calibration/coverage@1%": 0.4128658072236727, "calibration/coverage@10%": 0.6864931591311476, "calibration/coverage@15%": 0.803647817535496, "calibration/coverage@20%": 0.8913060083220238, "calibration/coverage@25%": 0.9221932114882506, "calibration/coverage@30%": 0.9221932114882506, "calibration/coverage@5%": 0.5839131007001614, "calibration/distribution_entropy_10": 0.6396196020074065, "calibration/distribution_entropy_100": 0.35237163811846184, "calibration/ece": 0.14547514358016939, "calibration/mean_confidence": 0.6918397988901672, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00590277777777779, "completions/max_length": 3942.4, "completions/max_terminated_length": 3942.4, "completions/mean_length": 1063.0981201171876, "completions/mean_terminated_length": 1069.5595336914062, "completions/min_length": 0.0, "completions/min_terminated_length": 269.0, "epoch": 1.4903846153846154, "grad_norm": 0.0004827018128708005, "learning_rate": 2.5240384615384618e-06, "loss": -0.007, "num_tokens": 1595827062.0, "reward": 1.3161508083343505, "reward_std": 0.09967431724071503, "rewards/accuracy_reward": 0.7645833253860473, "rewards/brier_reward": 0.8736067652702332, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9940972208976746, "rewards/mean_confidence_reward": 0.7170052051544189, "sampling/batch_mean_priority_error": 0.03028266689573435, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.19166666666666668, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009760693646967411, "sampling/priority_kl": 0.02999991215765476, "sampling/priority_scale": 0.7443806945579127, "sampling/prob_entropy": 10.278950119018555, "sampling/prob_max": 5.213724070927128e-05, "sampling/prob_min": 1.9976762996520846e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4832000017166138, "sampling/prompt_draws_total": 44496.0, "sampling/seen_fraction": 0.8530266761779786, "sampling/unseen_fraction": 0.1469733238220215, "signal/accuracy_reward/centered_abs_mean": 0.09691840261220933, "signal/accuracy_reward/group_std_mean": 0.1350296199321747, "signal/accuracy_reward/group_zero_std_frac": 0.5861111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04845920130610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04845920130610466, "signal/advantage_abs_mean": 0.06861151233315468, "signal/advantage_pre_scale_abs_mean": 0.06861151233315468, "signal/advantage_pre_scale_std": 0.15253279954195023, "signal/advantage_std": 0.15253279954195023, "signal/brier_reward/centered_abs_mean": 0.054378128796815875, "signal/brier_reward/group_std_mean": 0.07744768559932709, "signal/brier_reward/group_zero_std_frac": 0.20555555820465088, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027189064398407937, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027189064398407937, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.010091146035119892, "signal/format_reward/group_std_mean": 0.01992493961006403, "signal/format_reward/group_zero_std_frac": 0.9138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005045573017559946, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005045573017559946, "signal/mean_confidence_reward/centered_abs_mean": 0.049541018158197406, "signal/mean_confidence_reward/group_std_mean": 0.0657179981470108, "signal/mean_confidence_reward/group_zero_std_frac": 0.225, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.954101825660473e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.954101825660473e-07, "step": 620 }, { "calibration/aurc": 0.1692123429059063, "calibration/batch_distribution_entropy": 0.7073127475286846, "calibration/batch_entropy_100bins": 0.3959757774730325, "calibration/batch_entropy_10bins": 0.7073127475286846, "calibration/batch_entropy_50bins": 0.4661362784068519, "calibration/batch_uniqueness": 0.6192726904672575, "calibration/confidence_entropy": 0.5314451492589153, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.10704960835509139, "calibration/coverage@10%": 0.19200119669277632, "calibration/coverage@15%": 0.34942235081495865, "calibration/coverage@20%": 0.651106819781827, "calibration/coverage@25%": 0.861975514219057, "calibration/coverage@30%": 0.9502932210572362, "calibration/coverage@5%": 0.17059127502175805, "calibration/distribution_entropy_10": 0.7073127475286846, "calibration/distribution_entropy_100": 0.3959757774730325, "calibration/ece": 0.09932219899275038, "calibration/mean_confidence": 0.6648237434156267, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00425347222222221, "completions/max_length": 3901.8, "completions/max_terminated_length": 3901.8, "completions/mean_length": 1117.1639892578125, "completions/mean_terminated_length": 1121.9173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 303.2, "epoch": 1.5024038461538463, "grad_norm": 0.0005235833232291043, "learning_rate": 2.4939903846153847e-06, "loss": -0.0035, "num_tokens": 1611828215.0, "reward": 1.3013794660568236, "reward_std": 0.09948191493749618, "rewards/accuracy_reward": 0.74296875, "rewards/brier_reward": 0.8642031192779541, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9955729246139526, "rewards/mean_confidence_reward": 0.7044878482818604, "sampling/batch_mean_priority_error": 0.031117468382855173, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24722222222222223, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009788672253489495, "sampling/priority_kl": 0.029999787732958794, "sampling/priority_scale": 0.7461758435005322, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 5.233678166405298e-05, "sampling/prob_min": 2.0006873819511382e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.495199990272522, "sampling/prompt_draws_total": 44856.0, "sampling/seen_fraction": 0.8558666706085205, "sampling/unseen_fraction": 0.1441333293914795, "signal/accuracy_reward/centered_abs_mean": 0.0996365025639534, "signal/accuracy_reward/group_std_mean": 0.14100307822227479, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0498182512819767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0498182512819767, "signal/advantage_abs_mean": 0.06826261132955551, "signal/advantage_pre_scale_abs_mean": 0.06826261132955551, "signal/advantage_pre_scale_std": 0.14662763476371765, "signal/advantage_std": 0.14662763476371765, "signal/brier_reward/centered_abs_mean": 0.055239399522542955, "signal/brier_reward/group_std_mean": 0.07736771106719971, "signal/brier_reward/group_zero_std_frac": 0.23611111044883729, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027619699761271477, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027619699761271477, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007948133535683155, "signal/format_reward/group_std_mean": 0.01642947643995285, "signal/format_reward/group_zero_std_frac": 0.9277778029441833, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003974066767841578, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003974066767841578, "signal/mean_confidence_reward/centered_abs_mean": 0.04968967735767364, "signal/mean_confidence_reward/group_std_mean": 0.06586038321256638, "signal/mean_confidence_reward/group_zero_std_frac": 0.24166666567325593, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.968967687091208e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.968967687091208e-07, "step": 625 }, { "calibration/aurc": 0.07472276214057484, "calibration/batch_distribution_entropy": 0.6541581603664168, "calibration/batch_entropy_100bins": 0.3582094950133639, "calibration/batch_entropy_10bins": 0.6541581603664168, "calibration/batch_entropy_50bins": 0.42167842174866077, "calibration/batch_uniqueness": 0.45456095770444616, "calibration/confidence_entropy": 0.4816627977491092, "calibration/coverage@0%": 0.20175853018372703, "calibration/coverage@1%": 0.3404983955128461, "calibration/coverage@10%": 0.6757976022952985, "calibration/coverage@15%": 0.7468123990545482, "calibration/coverage@20%": 0.8864184404933099, "calibration/coverage@25%": 0.9407666134167011, "calibration/coverage@30%": 0.9884210526315791, "calibration/coverage@5%": 0.54803349468375, "calibration/distribution_entropy_10": 0.6541581603664168, "calibration/distribution_entropy_100": 0.3582094950133639, "calibration/ece": 0.1292614681630037, "calibration/mean_confidence": 0.70898345643133, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0019097222222222321, "completions/max_length": 3805.6, "completions/max_terminated_length": 3805.6, "completions/mean_length": 1087.827685546875, "completions/mean_terminated_length": 1089.9354614257813, "completions/min_length": 43.6, "completions/min_terminated_length": 284.4, "epoch": 1.5144230769230769, "grad_norm": 0.0004652974021155387, "learning_rate": 2.463942307692308e-06, "loss": -0.0006, "num_tokens": 1627494742.0, "reward": 1.3149696588516235, "reward_std": 0.08956841826438904, "rewards/accuracy_reward": 0.75234375, "rewards/brier_reward": 0.8796649217605591, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9979166626930237, "rewards/mean_confidence_reward": 0.6975868105888366, "sampling/batch_mean_priority_error": 0.025660975930045727, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2333333333333333, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009810564666986465, "sampling/priority_kl": 0.02999999262392521, "sampling/priority_scale": 0.7481342852348462, "sampling/prob_entropy": 10.27895278930664, "sampling/prob_max": 5.254092757240869e-05, "sampling/prob_min": 2.0034283807035536e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.507200002670288, "sampling/prompt_draws_total": 45216.0, "sampling/seen_fraction": 0.8587466716766358, "sampling/unseen_fraction": 0.14125332832336426, "signal/accuracy_reward/centered_abs_mean": 0.099853515625, "signal/accuracy_reward/group_std_mean": 0.13629501461982726, "signal/accuracy_reward/group_zero_std_frac": 0.5916666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0499267578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0499267578125, "signal/advantage_abs_mean": 0.0632256880402565, "signal/advantage_pre_scale_abs_mean": 0.0632256880402565, "signal/advantage_pre_scale_std": 0.13528273105621338, "signal/advantage_std": 0.13528273105621338, "signal/brier_reward/centered_abs_mean": 0.05339196249842644, "signal/brier_reward/group_std_mean": 0.07426847070455551, "signal/brier_reward/group_zero_std_frac": 0.22222222685813903, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02669598124921322, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02669598124921322, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0038845486007630823, "signal/format_reward/group_std_mean": 0.00885537788271904, "signal/format_reward/group_zero_std_frac": 0.9583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019422743003815412, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019422743003815412, "signal/mean_confidence_reward/centered_abs_mean": 0.05010091587901115, "signal/mean_confidence_reward/group_std_mean": 0.06616592630743981, "signal/mean_confidence_reward/group_zero_std_frac": 0.23611111640930177, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.010091399526573e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.010091399526573e-07, "step": 630 }, { "calibration/aurc": 0.0869278000999156, "calibration/batch_distribution_entropy": 0.6093055797850038, "calibration/batch_entropy_100bins": 0.34342417809934345, "calibration/batch_entropy_10bins": 0.6093055797850038, "calibration/batch_entropy_50bins": 0.4042733859018993, "calibration/batch_uniqueness": 0.37829591462204915, "calibration/confidence_entropy": 0.46445287192138196, "calibration/coverage@0%": 0.17471310903252826, "calibration/coverage@1%": 0.17471310903252826, "calibration/coverage@10%": 0.6890747385742918, "calibration/coverage@15%": 0.8484273941592011, "calibration/coverage@20%": 0.8900041805536268, "calibration/coverage@25%": 0.9237453276165347, "calibration/coverage@30%": 0.9583333333333333, "calibration/coverage@5%": 0.4357315831097729, "calibration/distribution_entropy_10": 0.6093055797850038, "calibration/distribution_entropy_100": 0.34342417809934345, "calibration/ece": 0.136922299089084, "calibration/mean_confidence": 0.6978689054156199, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005381944444444442, "completions/max_length": 3956.2, "completions/max_terminated_length": 3956.2, "completions/mean_length": 1087.4025390625, "completions/mean_terminated_length": 1093.3152587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 282.6, "epoch": 1.5264423076923077, "grad_norm": 0.0005978142144158483, "learning_rate": 2.433894230769231e-06, "loss": -0.0044, "num_tokens": 1643114099.0, "reward": 1.3020598888397217, "reward_std": 0.10506750345230102, "rewards/accuracy_reward": 0.7394965171813965, "rewards/brier_reward": 0.8700781345367432, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.994531261920929, "rewards/mean_confidence_reward": 0.692760419845581, "sampling/batch_mean_priority_error": 0.035493932026362335, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2416666666666667, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009836449101567269, "sampling/priority_kl": 0.030000242963433267, "sampling/priority_scale": 0.7501380623085424, "sampling/prob_entropy": 10.278949165344239, "sampling/prob_max": 5.274732830002904e-05, "sampling/prob_min": 2.006111608352512e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5192000150680542, "sampling/prompt_draws_total": 45576.0, "sampling/seen_fraction": 0.8615866661071777, "sampling/unseen_fraction": 0.13841333389282226, "signal/accuracy_reward/centered_abs_mean": 0.10477973222732544, "signal/accuracy_reward/group_std_mean": 0.14873329997062684, "signal/accuracy_reward/group_zero_std_frac": 0.5388888895511628, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05238986611366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05238986611366272, "signal/advantage_abs_mean": 0.07023787051439286, "signal/advantage_pre_scale_abs_mean": 0.07023787051439286, "signal/advantage_pre_scale_std": 0.15317056477069854, "signal/advantage_std": 0.15317056477069854, "signal/brier_reward/centered_abs_mean": 0.05945446789264679, "signal/brier_reward/group_std_mean": 0.08587936162948609, "signal/brier_reward/group_zero_std_frac": 0.275, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029727233946323393, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029727233946323393, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009977213479578495, "signal/format_reward/group_std_mean": 0.02117712702602148, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004988606739789248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004988606739789248, "signal/mean_confidence_reward/centered_abs_mean": 0.0510736845433712, "signal/mean_confidence_reward/group_std_mean": 0.06932668089866638, "signal/mean_confidence_reward/group_zero_std_frac": 0.3083333373069763, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.107368110657262e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.107368110657262e-07, "step": 635 }, { "calibration/aurc": 0.16204915459909533, "calibration/batch_distribution_entropy": 0.5274177386820045, "calibration/batch_entropy_100bins": 0.2888359937240352, "calibration/batch_entropy_10bins": 0.5274177386820045, "calibration/batch_entropy_50bins": 0.3400130584847098, "calibration/batch_uniqueness": 0.1655676973886686, "calibration/confidence_entropy": 0.42929724407108766, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.29937611653153773, "calibration/coverage@15%": 0.3413906829737529, "calibration/coverage@20%": 0.6711885039790432, "calibration/coverage@25%": 0.8699130660510619, "calibration/coverage@30%": 0.9429319371727749, "calibration/coverage@5%": 0.1368421052631579, "calibration/distribution_entropy_10": 0.5274177386820045, "calibration/distribution_entropy_100": 0.2888359937240352, "calibration/ece": 0.09777153337267655, "calibration/mean_confidence": 0.752498526092933, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006857638888888884, "completions/max_length": 3726.6, "completions/max_terminated_length": 3726.6, "completions/mean_length": 1074.8276123046876, "completions/mean_terminated_length": 1082.3476684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 275.2, "epoch": 1.5384615384615383, "grad_norm": 0.0006733312038704753, "learning_rate": 2.403846153846154e-06, "loss": -0.0059, "num_tokens": 1658593169.0, "reward": 1.3056791067123412, "reward_std": 0.10761468857526779, "rewards/accuracy_reward": 0.7495659708976745, "rewards/brier_reward": 0.8687222003936768, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9930555701255799, "rewards/mean_confidence_reward": 0.7232986450195312, "sampling/batch_mean_priority_error": 0.03791013955058041, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.23055555555555554, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009872362203896045, "sampling/priority_kl": 0.03000005818903446, "sampling/priority_scale": 0.7523871720070019, "sampling/prob_entropy": 10.278955078125, "sampling/prob_max": 5.2965757640777154e-05, "sampling/prob_min": 2.008588744502049e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5312000036239624, "sampling/prompt_draws_total": 45936.0, "sampling/seen_fraction": 0.8645066618919373, "sampling/unseen_fraction": 0.13549333810806274, "signal/accuracy_reward/centered_abs_mean": 0.1015896275639534, "signal/accuracy_reward/group_std_mean": 0.14180152118206024, "signal/accuracy_reward/group_zero_std_frac": 0.5666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0507948137819767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0507948137819767, "signal/advantage_abs_mean": 0.0746236190199852, "signal/advantage_pre_scale_abs_mean": 0.0746236190199852, "signal/advantage_pre_scale_std": 0.16241733729839325, "signal/advantage_std": 0.16241733729839325, "signal/brier_reward/centered_abs_mean": 0.06814387738704682, "signal/brier_reward/group_std_mean": 0.09454997181892395, "signal/brier_reward/group_zero_std_frac": 0.3111111134290695, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03407193869352341, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03407193869352341, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.010449218843132258, "signal/format_reward/group_std_mean": 0.019220804050564765, "signal/format_reward/group_zero_std_frac": 0.9222222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005224609421566129, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005224609421566129, "signal/mean_confidence_reward/centered_abs_mean": 0.05597711279988289, "signal/mean_confidence_reward/group_std_mean": 0.07454514056444168, "signal/mean_confidence_reward/group_zero_std_frac": 0.3444444477558136, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.597710924121202e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.597710924121202e-07, "step": 640 }, { "calibration/aurc": 0.14557671748218712, "calibration/batch_distribution_entropy": 0.48910598486412377, "calibration/batch_entropy_100bins": 0.2652488436322299, "calibration/batch_entropy_10bins": 0.48910598486412377, "calibration/batch_entropy_50bins": 0.31224664703352756, "calibration/batch_uniqueness": 0.052291835380526994, "calibration/confidence_entropy": 0.42219030296825294, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.36709317585301837, "calibration/coverage@15%": 0.6915279590948651, "calibration/coverage@20%": 0.7566579634464752, "calibration/coverage@25%": 0.7796344647519582, "calibration/coverage@30%": 0.9255072345517842, "calibration/coverage@5%": 0.16614583333333333, "calibration/distribution_entropy_10": 0.48910598486412377, "calibration/distribution_entropy_100": 0.2652488436322299, "calibration/ece": 0.09933202463799398, "calibration/mean_confidence": 0.7654084570715379, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444442, "completions/max_length": 3880.8, "completions/max_terminated_length": 3880.8, "completions/mean_length": 1150.5470458984375, "completions/mean_terminated_length": 1160.5489013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 293.2, "epoch": 1.5504807692307692, "grad_norm": 0.0005169560317881405, "learning_rate": 2.373798076923077e-06, "loss": -0.0113, "num_tokens": 1674952815.0, "reward": 1.288080620765686, "reward_std": 0.12121982872486115, "rewards/accuracy_reward": 0.7271701455116272, "rewards/brier_reward": 0.8579175591468811, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9910590291023255, "rewards/mean_confidence_reward": 0.7332204699516296, "sampling/batch_mean_priority_error": 0.04735698801884604, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.21666666666666665, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009915192052721978, "sampling/priority_kl": 0.029999776929616927, "sampling/priority_scale": 0.7540454684989527, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 5.3160951938480136e-05, "sampling/prob_min": 2.0116871382924728e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5432000160217285, "sampling/prompt_draws_total": 46296.0, "sampling/seen_fraction": 0.8670066714286804, "sampling/unseen_fraction": 0.13299332857131957, "signal/accuracy_reward/centered_abs_mean": 0.10897894948720932, "signal/accuracy_reward/group_std_mean": 0.1503289371728897, "signal/accuracy_reward/group_zero_std_frac": 0.5472222447395325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05448947474360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05448947474360466, "signal/advantage_abs_mean": 0.08551755398511887, "signal/advantage_pre_scale_abs_mean": 0.08551755398511887, "signal/advantage_pre_scale_std": 0.18074164986610414, "signal/advantage_std": 0.18074164986610414, "signal/brier_reward/centered_abs_mean": 0.07495963871479035, "signal/brier_reward/group_std_mean": 0.1039071187376976, "signal/brier_reward/group_zero_std_frac": 0.3388888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037479819357395174, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037479819357395174, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.014284939505159855, "signal/format_reward/group_std_mean": 0.025171923637390136, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0071424697525799274, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0071424697525799274, "signal/mean_confidence_reward/centered_abs_mean": 0.05712837353348732, "signal/mean_confidence_reward/group_std_mean": 0.0764322578907013, "signal/mean_confidence_reward/group_zero_std_frac": 0.3777777850627899, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.712837264582049e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.712837264582049e-07, "step": 645 }, { "calibration/aurc": 0.14833079176694414, "calibration/batch_distribution_entropy": 0.3771737415299348, "calibration/batch_entropy_100bins": 0.20520349746532088, "calibration/batch_entropy_10bins": 0.3771737415299348, "calibration/batch_entropy_50bins": 0.24156223705140362, "calibration/batch_uniqueness": -0.19533839640881734, "calibration/confidence_entropy": 0.3984146293875352, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.30994764397905755, "calibration/coverage@15%": 0.5146911268163985, "calibration/coverage@20%": 0.7292323419304039, "calibration/coverage@25%": 0.9198952879581151, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.1863874345549738, "calibration/distribution_entropy_10": 0.3771737415299348, "calibration/distribution_entropy_100": 0.20520349746532088, "calibration/ece": 0.10669382387807795, "calibration/mean_confidence": 0.8033982003309867, "calibration/unique_confidence_per_question": 0.019791666666666662, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007638888888888862, "completions/max_length": 3855.8, "completions/max_terminated_length": 3855.8, "completions/mean_length": 1063.4618896484376, "completions/mean_terminated_length": 1071.8775146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 302.8, "epoch": 1.5625, "grad_norm": 0.0005790924187749624, "learning_rate": 2.3437500000000002e-06, "loss": -0.0066, "num_tokens": 1690267352.0, "reward": 1.3241550207138062, "reward_std": 0.11605097949504853, "rewards/accuracy_reward": 0.7796875, "rewards/brier_reward": 0.8764192819595337, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9921875119209289, "rewards/mean_confidence_reward": 0.792578113079071, "sampling/batch_mean_priority_error": 0.04947610722316871, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20277777777777778, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.009960375353693962, "sampling/priority_kl": 0.029999787360429762, "sampling/priority_scale": 0.7557832061545924, "sampling/prob_entropy": 10.278946876525879, "sampling/prob_max": 5.335809182724915e-05, "sampling/prob_min": 2.0122729256399908e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5552000045776366, "sampling/prompt_draws_total": 46656.0, "sampling/seen_fraction": 0.869486677646637, "sampling/unseen_fraction": 0.13051332235336305, "signal/accuracy_reward/centered_abs_mean": 0.10058593600988389, "signal/accuracy_reward/group_std_mean": 0.13794552087783812, "signal/accuracy_reward/group_zero_std_frac": 0.5861111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05029296800494194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05029296800494194, "signal/advantage_abs_mean": 0.08049516677856446, "signal/advantage_pre_scale_abs_mean": 0.08049516677856446, "signal/advantage_pre_scale_std": 0.1770169109106064, "signal/advantage_std": 0.1770169109106064, "signal/brier_reward/centered_abs_mean": 0.06743035167455673, "signal/brier_reward/group_std_mean": 0.09736959785223007, "signal/brier_reward/group_zero_std_frac": 0.4250000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033715175837278365, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033715175837278365, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.011990017257630825, "signal/format_reward/group_std_mean": 0.02398403510451317, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0059950086288154125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0059950086288154125, "signal/mean_confidence_reward/centered_abs_mean": 0.049835624545812605, "signal/mean_confidence_reward/group_std_mean": 0.06977761834859848, "signal/mean_confidence_reward/group_zero_std_frac": 0.4694444477558136, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.983562519100815e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.983562519100815e-07, "step": 650 }, { "epoch": 1.5625, "eval_calibration/aurc": 0.1593088521292015, "eval_calibration/batch_distribution_entropy": 0.45063243173571826, "eval_calibration/batch_entropy_100bins": 0.2465860221535046, "eval_calibration/batch_entropy_10bins": 0.45063243173571826, "eval_calibration/batch_entropy_50bins": 0.2902770755506939, "eval_calibration/batch_uniqueness": -0.048974735640835654, "eval_calibration/confidence_entropy": 0.4098930569864548, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.7151567944250871, "eval_calibration/coverage@20%": 0.8153310104529616, "eval_calibration/coverage@25%": 0.936411149825784, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.45063243173571826, "eval_calibration/distribution_entropy_100": 0.2465860221535046, "eval_calibration/ece": 0.06210801393728245, "eval_calibration/mean_confidence": 0.784233449477352, "eval_calibration/unique_confidence_per_question": 0.006944444444444444, "eval_calibration/unique_confidences": 8, "eval_completions/clipped_ratio": 0.0034722222222222285, "eval_completions/max_length": 2993.1666666666665, "eval_completions/max_terminated_length": 2993.1666666666665, "eval_completions/mean_length": 1073.7899576822917, "eval_completions/mean_terminated_length": 1077.5393676757812, "eval_completions/min_length": 175.5, "eval_completions/min_terminated_length": 332.0, "eval_loss": 0.0, "eval_num_tokens": 1690267352.0, "eval_reward": 1.2808280785878499, "eval_reward_std": 0.33280307054519653, "eval_rewards/accuracy_reward": 0.7196180621782938, "eval_rewards/brier_reward": 0.845494786898295, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.996527781089147, "eval_rewards/mean_confidence_reward": 0.7815104027589163, "eval_runtime": 195.6129, "eval_samples_per_second": 5.112, "eval_signal/accuracy_reward/centered_abs_mean": 0.3918728331724803, "eval_signal/accuracy_reward/group_std_mean": 0.4482126832008362, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19593641658624014, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19593641658624014, "eval_signal/advantage_abs_mean": 0.27938560644785565, "eval_signal/advantage_pre_scale_abs_mean": 0.27938560644785565, "eval_signal/advantage_pre_scale_std": 0.3301011423269908, "eval_signal/advantage_std": 0.3301011423269908, "eval_signal/brier_reward/centered_abs_mean": 0.19026151796181998, "eval_signal/brier_reward/group_std_mean": 0.25656550129254657, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09513075898090999, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.09513075898090999, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.006727430348594983, "eval_signal/format_reward/group_std_mean": 0.019641855110724766, "eval_signal/format_reward/group_zero_std_frac": 0.8888889054457346, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0033637151742974916, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.16597218811511993, "eval_signal/mean_confidence_reward/group_std_mean": 0.21074802925189337, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6597218556550313e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6597218556550313e-06, "eval_steps_per_second": 0.031, "step": 650 }, { "epoch": 1.5625, "step": 650, "train_probe_calibration/aurc": 0.11914299116697188, "train_probe_calibration/batch_distribution_entropy": 0.4031877624643556, "train_probe_calibration/batch_entropy_100bins": 0.22108942808918203, "train_probe_calibration/batch_entropy_10bins": 0.4031877624643556, "train_probe_calibration/batch_entropy_50bins": 0.26026289754960896, "train_probe_calibration/batch_uniqueness": -0.15853625979672398, "train_probe_calibration/confidence_entropy": 0.39954392713722525, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.0, "train_probe_calibration/coverage@15%": 0.8393013100436681, "train_probe_calibration/coverage@20%": 0.9502183406113537, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.4031877624643556, "train_probe_calibration/distribution_entropy_100": 0.22108942808918203, "train_probe_calibration/ece": 0.03397379912663767, "train_probe_calibration/mean_confidence": 0.8000873362445416, "train_probe_calibration/unique_confidence_per_question": 0.006944444444444444, "train_probe_calibration/unique_confidences": 8, "train_probe_completions/clipped_ratio": 0.005208333333333333, "train_probe_completions/max_length": 3387.8333333333335, "train_probe_completions/max_terminated_length": 3387.8333333333335, "train_probe_completions/mean_length": 1080.039082845052, "train_probe_completions/mean_terminated_length": 1085.6224975585938, "train_probe_completions/min_length": 100.0, "train_probe_completions/min_terminated_length": 281.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 1690267352.0, "train_probe_reward": 1.3141876459121704, "train_probe_reward_std": 0.31840459009011585, "train_probe_rewards/accuracy_reward": 0.7708333333333334, "train_probe_rewards/brier_reward": 0.8636024494965872, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9939236144224802, "train_probe_rewards/mean_confidence_reward": 0.7952256699403127, "train_probe_runtime": 205.8787, "train_probe_samples_per_second": 4.857, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3425564269224803, "train_probe_signal/accuracy_reward/group_std_mean": 0.41801586250464123, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.17127821346124014, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17127821346124014, "train_probe_signal/advantage_abs_mean": 0.25017442802588147, "train_probe_signal/advantage_pre_scale_abs_mean": 0.25017442802588147, "train_probe_signal/advantage_pre_scale_std": 0.31694228450457257, "train_probe_signal/advantage_std": 0.31694228450457257, "train_probe_signal/brier_reward/centered_abs_mean": 0.17618762950102487, "train_probe_signal/brier_reward/group_std_mean": 0.24327529221773148, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08809381475051244, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08809381475051244, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.011773003110041222, "train_probe_signal/format_reward/group_std_mean": 0.034373246443768345, "train_probe_signal/format_reward/group_zero_std_frac": 0.8055555820465088, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501555020611, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.005886501555020611, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.15472543984651566, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.20226836701234183, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.5472543850592047e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.5472543850592047e-06, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.1108042678066182, "calibration/batch_distribution_entropy": 0.42072037376674637, "calibration/batch_entropy_100bins": 0.22931455418855728, "calibration/batch_entropy_10bins": 0.42072037376674637, "calibration/batch_entropy_50bins": 0.26994538291236825, "calibration/batch_uniqueness": -0.08188383227497045, "calibration/confidence_entropy": 0.4100209457284782, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.11510416666666667, "calibration/coverage@10%": 0.5703043407310705, "calibration/coverage@15%": 0.7349573490813649, "calibration/coverage@20%": 0.8836239710873837, "calibration/coverage@25%": 0.9465157823858406, "calibration/coverage@30%": 0.9564895356666803, "calibration/coverage@5%": 0.3140625, "calibration/distribution_entropy_10": 0.42072037376674637, "calibration/distribution_entropy_100": 0.22931455418855728, "calibration/ece": 0.10658886146536148, "calibration/mean_confidence": 0.7913267767461489, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333348, "completions/max_length": 3753.2, "completions/max_terminated_length": 3753.2, "completions/mean_length": 1044.0681518554688, "completions/mean_terminated_length": 1051.2261840820313, "completions/min_length": 0.0, "completions/min_terminated_length": 249.8, "epoch": 1.5745192307692308, "grad_norm": 0.0008380069630220532, "learning_rate": 2.3137019230769236e-06, "loss": -0.0067, "num_tokens": 1705374633.0, "reward": 1.3213003873825073, "reward_std": 0.129642491042614, "rewards/accuracy_reward": 0.7828993201255798, "rewards/brier_reward": 0.8664566040039062, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9932291626930236, "rewards/mean_confidence_reward": 0.7906076312065125, "sampling/batch_mean_priority_error": 0.052881742346931083, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.25555555555555554, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010013969987630844, "sampling/priority_kl": 0.029999995976686476, "sampling/priority_scale": 0.7580755293602124, "sampling/prob_entropy": 10.278956985473632, "sampling/prob_max": 5.357732443371788e-05, "sampling/prob_min": 2.012997429119423e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.567199993133545, "sampling/prompt_draws_total": 47016.0, "sampling/seen_fraction": 0.8722466707229615, "sampling/unseen_fraction": 0.12775332927703859, "signal/accuracy_reward/centered_abs_mean": 0.1139485701918602, "signal/accuracy_reward/group_std_mean": 0.1550443172454834, "signal/accuracy_reward/group_zero_std_frac": 0.5444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0569742850959301, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0569742850959301, "signal/advantage_abs_mean": 0.0928182914853096, "signal/advantage_pre_scale_abs_mean": 0.0928182914853096, "signal/advantage_pre_scale_std": 0.1907956838607788, "signal/advantage_std": 0.1907956838607788, "signal/brier_reward/centered_abs_mean": 0.07859473824501037, "signal/brier_reward/group_std_mean": 0.10812324136495591, "signal/brier_reward/group_zero_std_frac": 0.3944444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039297369122505185, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.039297369122505185, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01182725690305233, "signal/format_reward/group_std_mean": 0.023676491528749465, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005913628451526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005913628451526165, "signal/mean_confidence_reward/centered_abs_mean": 0.05477920100092888, "signal/mean_confidence_reward/group_std_mean": 0.07536292672157288, "signal/mean_confidence_reward/group_zero_std_frac": 0.45833333730697634, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.477919728491543e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.477919728491543e-07, "step": 655 }, { "calibration/aurc": 0.11131981175442647, "calibration/batch_distribution_entropy": 0.41437273865924407, "calibration/batch_entropy_100bins": 0.2257903019472393, "calibration/batch_entropy_10bins": 0.41437273865924407, "calibration/batch_entropy_50bins": 0.26579669019581226, "calibration/batch_uniqueness": -0.08864372136822053, "calibration/confidence_entropy": 0.40409001678171536, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.12637075718015667, "calibration/coverage@10%": 0.53459153114999, "calibration/coverage@15%": 0.7111558835824373, "calibration/coverage@20%": 0.8782477048970255, "calibration/coverage@25%": 0.9270458882451414, "calibration/coverage@30%": 0.9323229331000228, "calibration/coverage@5%": 0.34516971279373365, "calibration/distribution_entropy_10": 0.41437273865924407, "calibration/distribution_entropy_100": 0.2257903019472393, "calibration/ece": 0.08919565324662408, "calibration/mean_confidence": 0.7744310496497983, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00625, "completions/max_length": 3810.6, "completions/max_terminated_length": 3810.6, "completions/mean_length": 1064.3376220703126, "completions/mean_terminated_length": 1071.087841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 260.8, "epoch": 1.5865384615384617, "grad_norm": 0.0007543113315477967, "learning_rate": 2.283653846153846e-06, "loss": -0.0074, "num_tokens": 1720727802.0, "reward": 1.3003161668777465, "reward_std": 0.11281631141901016, "rewards/accuracy_reward": 0.7461805582046509, "rewards/brier_reward": 0.8606866598129272, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9937500119209289, "rewards/mean_confidence_reward": 0.7538802027702332, "sampling/batch_mean_priority_error": 0.05363177499702718, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20833333333333334, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010069727897644043, "sampling/priority_kl": 0.03000015877187252, "sampling/priority_scale": 0.7605190575355664, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 5.380468719522469e-05, "sampling/prob_min": 2.0151718490524218e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.579199981689453, "sampling/prompt_draws_total": 47376.0, "sampling/seen_fraction": 0.875, "sampling/unseen_fraction": 0.125, "signal/accuracy_reward/centered_abs_mean": 0.10485026091337205, "signal/accuracy_reward/group_std_mean": 0.14478610157966615, "signal/accuracy_reward/group_zero_std_frac": 0.5555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05242513045668602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05242513045668602, "signal/advantage_abs_mean": 0.0782653659582138, "signal/advantage_pre_scale_abs_mean": 0.0782653659582138, "signal/advantage_pre_scale_std": 0.1688857764005661, "signal/advantage_std": 0.1688857764005661, "signal/brier_reward/centered_abs_mean": 0.0728747896850109, "signal/brier_reward/group_std_mean": 0.10191280543804168, "signal/brier_reward/group_zero_std_frac": 0.375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03643739484250545, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03643739484250545, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.010872395848855377, "signal/format_reward/group_std_mean": 0.022299006581306458, "signal/format_reward/group_zero_std_frac": 0.9, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005436197924427688, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005436197924427688, "signal/mean_confidence_reward/centered_abs_mean": 0.0543424591422081, "signal/mean_confidence_reward/group_std_mean": 0.07400039434432984, "signal/mean_confidence_reward/group_zero_std_frac": 0.42777777910232545, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.434245792912407e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.434245792912407e-07, "step": 660 }, { "calibration/aurc": 0.1159017932887628, "calibration/batch_distribution_entropy": 0.46562924757054425, "calibration/batch_entropy_100bins": 0.2589695955178345, "calibration/batch_entropy_10bins": 0.46562924757054425, "calibration/batch_entropy_50bins": 0.3048548177506447, "calibration/batch_uniqueness": 0.05487002083086965, "calibration/confidence_entropy": 0.4198923602522945, "calibration/coverage@0%": 0.1328125, "calibration/coverage@1%": 0.13385416666666666, "calibration/coverage@10%": 0.6289555662145125, "calibration/coverage@15%": 0.6962895778364115, "calibration/coverage@20%": 0.7218997361477573, "calibration/coverage@25%": 0.8915332440011603, "calibration/coverage@30%": 0.9278412465982401, "calibration/coverage@5%": 0.4299216502074764, "calibration/distribution_entropy_10": 0.46562924757054425, "calibration/distribution_entropy_100": 0.2589695955178345, "calibration/ece": 0.1093648627357036, "calibration/mean_confidence": 0.7479002819167239, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666652, "completions/max_length": 3819.2, "completions/max_terminated_length": 3819.2, "completions/mean_length": 1034.9790893554687, "completions/mean_terminated_length": 1040.2152709960938, "completions/min_length": 0.0, "completions/min_terminated_length": 284.6, "epoch": 1.5985576923076923, "grad_norm": 0.0005559017299674451, "learning_rate": 2.2536057692307694e-06, "loss": -0.0048, "num_tokens": 1735753353.0, "reward": 1.3110567331314087, "reward_std": 0.11565322428941727, "rewards/accuracy_reward": 0.7572048664093017, "rewards/brier_reward": 0.86984201669693, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9950520873069764, "rewards/mean_confidence_reward": 0.7291840076446533, "sampling/batch_mean_priority_error": 0.041489098082864787, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18055555555555552, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010115215368568898, "sampling/priority_kl": 0.02999992445111275, "sampling/priority_scale": 0.7622721730964258, "sampling/prob_entropy": 10.278946685791016, "sampling/prob_max": 5.400420050136745e-05, "sampling/prob_min": 2.0180710271233694e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5911999940872192, "sampling/prompt_draws_total": 47736.0, "sampling/seen_fraction": 0.8773200035095214, "sampling/unseen_fraction": 0.12267999649047852, "signal/accuracy_reward/centered_abs_mean": 0.12019856721162796, "signal/accuracy_reward/group_std_mean": 0.15842821300029755, "signal/accuracy_reward/group_zero_std_frac": 0.547222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06009928360581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06009928360581398, "signal/advantage_abs_mean": 0.08310452848672867, "signal/advantage_pre_scale_abs_mean": 0.08310452848672867, "signal/advantage_pre_scale_std": 0.1731024533510208, "signal/advantage_std": 0.1731024533510208, "signal/brier_reward/centered_abs_mean": 0.07321913093328476, "signal/brier_reward/group_std_mean": 0.10007321387529373, "signal/brier_reward/group_zero_std_frac": 0.33333333730697634, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03660956546664238, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03660956546664238, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.009087456623092294, "signal/format_reward/group_std_mean": 0.01993839703500271, "signal/format_reward/group_zero_std_frac": 0.9083333253860474, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004543728311546147, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004543728311546147, "signal/mean_confidence_reward/centered_abs_mean": 0.05739041939377785, "signal/mean_confidence_reward/group_std_mean": 0.07699377685785294, "signal/mean_confidence_reward/group_zero_std_frac": 0.3777777850627899, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.739041739616368e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.739041739616368e-07, "step": 665 }, { "calibration/aurc": 0.11042005183808701, "calibration/batch_distribution_entropy": 0.5913382170007677, "calibration/batch_entropy_100bins": 0.32877258800773546, "calibration/batch_entropy_10bins": 0.5913382170007677, "calibration/batch_entropy_50bins": 0.38702577110679987, "calibration/batch_uniqueness": 0.34693221792977613, "calibration/confidence_entropy": 0.45987446598557097, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.12180851063829787, "calibration/coverage@10%": 0.3584779722885766, "calibration/coverage@15%": 0.7167577612497782, "calibration/coverage@20%": 0.8871506519359545, "calibration/coverage@25%": 0.9662269129287597, "calibration/coverage@30%": 0.9662269129287597, "calibration/coverage@5%": 0.3091463958020851, "calibration/distribution_entropy_10": 0.5913382170007677, "calibration/distribution_entropy_100": 0.32877258800773546, "calibration/ece": 0.14242507122515471, "calibration/mean_confidence": 0.6710677370417025, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010677083333333327, "completions/max_length": 3976.8, "completions/max_terminated_length": 3976.8, "completions/mean_length": 1062.3335083007812, "completions/mean_terminated_length": 1073.8999389648438, "completions/min_length": 0.0, "completions/min_terminated_length": 208.4, "epoch": 1.6105769230769231, "grad_norm": 0.0004333420074544847, "learning_rate": 2.2235576923076924e-06, "loss": -0.0123, "num_tokens": 1751071755.0, "reward": 1.285237693786621, "reward_std": 0.11045846790075302, "rewards/accuracy_reward": 0.7262152910232544, "rewards/brier_reward": 0.8550104260444641, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9892361044883728, "rewards/mean_confidence_reward": 0.6805728912353516, "sampling/batch_mean_priority_error": 0.038647688258623844, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20833333333333334, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010153127089142799, "sampling/priority_kl": 0.030000197887420654, "sampling/priority_scale": 0.7641624867683277, "sampling/prob_entropy": 10.278944778442384, "sampling/prob_max": 5.420961242634803e-05, "sampling/prob_min": 2.0208075875416397e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6032000064849854, "sampling/prompt_draws_total": 48096.0, "sampling/seen_fraction": 0.8796666622161865, "sampling/unseen_fraction": 0.12033333778381347, "signal/accuracy_reward/centered_abs_mean": 0.11707899421453476, "signal/accuracy_reward/group_std_mean": 0.15186972320079803, "signal/accuracy_reward/group_zero_std_frac": 0.5694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05853949710726738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05853949710726738, "signal/advantage_abs_mean": 0.07833757400512695, "signal/advantage_pre_scale_abs_mean": 0.07833757400512695, "signal/advantage_pre_scale_std": 0.1668864071369171, "signal/advantage_std": 0.1668864071369171, "signal/brier_reward/centered_abs_mean": 0.0744941309094429, "signal/brier_reward/group_std_mean": 0.10006592720746994, "signal/brier_reward/group_zero_std_frac": 0.3027777850627899, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03724706545472145, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03724706545472145, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.017892795242369176, "signal/format_reward/group_std_mean": 0.03308853507041931, "signal/format_reward/group_zero_std_frac": 0.8638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008946397621184588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008946397621184588, "signal/mean_confidence_reward/centered_abs_mean": 0.05739258825778961, "signal/mean_confidence_reward/group_std_mean": 0.07705257311463357, "signal/mean_confidence_reward/group_zero_std_frac": 0.32222222685813906, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.73925871094616e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.73925871094616e-07, "step": 670 }, { "calibration/aurc": 0.10770899593223338, "calibration/batch_distribution_entropy": 0.6491505336506754, "calibration/batch_entropy_100bins": 0.36866915529152067, "calibration/batch_entropy_10bins": 0.6491505336506754, "calibration/batch_entropy_50bins": 0.43399136459222154, "calibration/batch_uniqueness": 0.4690463544560851, "calibration/confidence_entropy": 0.48343109218125313, "calibration/coverage@0%": 0.1550098340674257, "calibration/coverage@1%": 0.1550098340674257, "calibration/coverage@10%": 0.5382968708318092, "calibration/coverage@15%": 0.7641439630800948, "calibration/coverage@20%": 0.8426342540515769, "calibration/coverage@25%": 0.9148564762737991, "calibration/coverage@30%": 0.9496062992125985, "calibration/coverage@5%": 0.368785838942907, "calibration/distribution_entropy_10": 0.6491505336506754, "calibration/distribution_entropy_100": 0.36866915529152067, "calibration/ece": 0.12161472797602957, "calibration/mean_confidence": 0.6472128889170813, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007465277777777768, "completions/max_length": 3823.4, "completions/max_terminated_length": 3823.4, "completions/mean_length": 1071.7802368164062, "completions/mean_terminated_length": 1079.799658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 1.6225961538461537, "grad_norm": 0.00043507353984750807, "learning_rate": 2.1935096153846157e-06, "loss": -0.0083, "num_tokens": 1766519559.0, "reward": 1.2930452346801757, "reward_std": 0.09925612062215805, "rewards/accuracy_reward": 0.7403645873069763, "rewards/brier_reward": 0.8531779527664185, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9925347328186035, "rewards/mean_confidence_reward": 0.6609287977218627, "sampling/batch_mean_priority_error": 0.04003342984423544, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16388888888888892, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010190530680119991, "sampling/priority_kl": 0.02999986708164215, "sampling/priority_scale": 0.7658457219367847, "sampling/prob_entropy": 10.278948974609374, "sampling/prob_max": 5.440502281999216e-05, "sampling/prob_min": 2.023689194174949e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6151999950408935, "sampling/prompt_draws_total": 48456.0, "sampling/seen_fraction": 0.8818799972534179, "sampling/unseen_fraction": 0.11812000274658203, "signal/accuracy_reward/centered_abs_mean": 0.10931532233953475, "signal/accuracy_reward/group_std_mean": 0.14885615557432175, "signal/accuracy_reward/group_zero_std_frac": 0.55277778506279, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05465766116976738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05465766116976738, "signal/advantage_abs_mean": 0.06904994621872902, "signal/advantage_pre_scale_abs_mean": 0.06904994621872902, "signal/advantage_pre_scale_std": 0.15002830922603608, "signal/advantage_std": 0.15002830922603608, "signal/brier_reward/centered_abs_mean": 0.06792017593979835, "signal/brier_reward/group_std_mean": 0.09253233224153519, "signal/brier_reward/group_zero_std_frac": 0.25, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033960087969899175, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033960087969899175, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01263020820915699, "signal/format_reward/group_std_mean": 0.02473071329295635, "signal/format_reward/group_zero_std_frac": 0.8944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006315104104578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006315104104578495, "signal/mean_confidence_reward/centered_abs_mean": 0.05845487043261528, "signal/mean_confidence_reward/group_std_mean": 0.07675684094429017, "signal/mean_confidence_reward/group_zero_std_frac": 0.2694444447755814, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.845486839461955e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.845486839461955e-07, "step": 675 }, { "calibration/aurc": 0.09314557605787317, "calibration/batch_distribution_entropy": 0.6623083902603728, "calibration/batch_entropy_100bins": 0.3666467877218496, "calibration/batch_entropy_10bins": 0.6623083902603728, "calibration/batch_entropy_50bins": 0.4316106662108378, "calibration/batch_uniqueness": 0.49281141493055564, "calibration/confidence_entropy": 0.4771051935748657, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5671875, "calibration/coverage@15%": 0.8802083333333333, "calibration/coverage@20%": 0.9526041666666666, "calibration/coverage@25%": 0.9796875, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.31927083333333334, "calibration/distribution_entropy_10": 0.6623083902603728, "calibration/distribution_entropy_100": 0.3666467877218496, "calibration/ece": 0.16416666666666666, "calibration/mean_confidence": 0.68875, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0073784722222222324, "completions/max_length": 3977.4, "completions/max_terminated_length": 3977.4, "completions/mean_length": 1054.840185546875, "completions/mean_terminated_length": 1062.661328125, "completions/min_length": 0.0, "completions/min_terminated_length": 238.8, "epoch": 1.6346153846153846, "grad_norm": 0.0005089513724669814, "learning_rate": 2.1634615384615387e-06, "loss": -0.0086, "num_tokens": 1781786774.0, "reward": 1.3072033882141114, "reward_std": 0.09771093726158142, "rewards/accuracy_reward": 0.7621527791023255, "rewards/brier_reward": 0.8596189141273498, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9926215291023255, "rewards/mean_confidence_reward": 0.6796440839767456, "sampling/batch_mean_priority_error": 0.03946241023398036, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18611111111111112, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010225803218781948, "sampling/priority_kl": 0.029999976232647895, "sampling/priority_scale": 0.7673097312217578, "sampling/prob_entropy": 10.278958702087403, "sampling/prob_max": 5.459113890537992e-05, "sampling/prob_min": 2.0267764557502233e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6272000074386597, "sampling/prompt_draws_total": 48816.0, "sampling/seen_fraction": 0.8839199900627136, "sampling/unseen_fraction": 0.11608000993728637, "signal/accuracy_reward/centered_abs_mean": 0.09992404282093048, "signal/accuracy_reward/group_std_mean": 0.1337385058403015, "signal/accuracy_reward/group_zero_std_frac": 0.6166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04996202141046524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04996202141046524, "signal/advantage_abs_mean": 0.06958887130022048, "signal/advantage_pre_scale_abs_mean": 0.06958887130022048, "signal/advantage_pre_scale_std": 0.1529184877872467, "signal/advantage_std": 0.1529184877872467, "signal/brier_reward/centered_abs_mean": 0.06304295957088471, "signal/brier_reward/group_std_mean": 0.08608659505844116, "signal/brier_reward/group_zero_std_frac": 0.2888888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031521479785442355, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031521479785442355, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.012353515625, "signal/format_reward/group_std_mean": 0.023333887942135333, "signal/format_reward/group_zero_std_frac": 0.9055555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0061767578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0061767578125, "signal/mean_confidence_reward/centered_abs_mean": 0.0535801962018013, "signal/mean_confidence_reward/group_std_mean": 0.07002054750919343, "signal/mean_confidence_reward/group_zero_std_frac": 0.30277777910232545, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.358019279810832e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.358019279810832e-07, "step": 680 }, { "calibration/aurc": 0.1198504663022937, "calibration/batch_distribution_entropy": 0.5066540096082259, "calibration/batch_entropy_100bins": 0.2788550145350276, "calibration/batch_entropy_10bins": 0.5066540096082259, "calibration/batch_entropy_50bins": 0.32826361127430026, "calibration/batch_uniqueness": 0.1365229156710747, "calibration/confidence_entropy": 0.440284206232496, "calibration/coverage@0%": 0.11614583333333332, "calibration/coverage@1%": 0.12135416666666668, "calibration/coverage@10%": 0.4124090608465608, "calibration/coverage@15%": 0.7188620071684587, "calibration/coverage@20%": 0.8839299048472435, "calibration/coverage@25%": 0.9078125, "calibration/coverage@30%": 0.94375, "calibration/coverage@5%": 0.2625, "calibration/distribution_entropy_10": 0.5066540096082259, "calibration/distribution_entropy_100": 0.2788550145350276, "calibration/ece": 0.10422848395630649, "calibration/mean_confidence": 0.7424086341525858, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00590277777777779, "completions/max_length": 3630.6, "completions/max_terminated_length": 3630.6, "completions/mean_length": 1009.5178955078125, "completions/mean_terminated_length": 1015.4686645507812, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 1.6466346153846154, "grad_norm": 0.0005279097822494805, "learning_rate": 2.1334134615384616e-06, "loss": -0.0054, "num_tokens": 1796476292.0, "reward": 1.3186150312423706, "reward_std": 0.10911149829626084, "rewards/accuracy_reward": 0.7713541626930237, "rewards/brier_reward": 0.8717638969421386, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9940972328186035, "rewards/mean_confidence_reward": 0.7347743034362793, "sampling/batch_mean_priority_error": 0.035970672211721816, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16111111111111112, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010261932387948036, "sampling/priority_kl": 0.029999835789203642, "sampling/priority_scale": 0.7691675721900537, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 5.479661704157479e-05, "sampling/prob_min": 2.0295308058848604e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6392000198364258, "sampling/prompt_draws_total": 49176.0, "sampling/seen_fraction": 0.8860866785049438, "sampling/unseen_fraction": 0.11391332149505615, "signal/accuracy_reward/centered_abs_mean": 0.10825737714767455, "signal/accuracy_reward/group_std_mean": 0.14899571537971495, "signal/accuracy_reward/group_zero_std_frac": 0.55277778506279, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05412868857383728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05412868857383728, "signal/advantage_abs_mean": 0.07697880044579505, "signal/advantage_pre_scale_abs_mean": 0.07697880044579505, "signal/advantage_pre_scale_std": 0.16402750015258788, "signal/advantage_std": 0.16402750015258788, "signal/brier_reward/centered_abs_mean": 0.06315912455320358, "signal/brier_reward/group_std_mean": 0.08676011264324188, "signal/brier_reward/group_zero_std_frac": 0.325, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03157956227660179, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03157956227660179, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.010112847201526165, "signal/format_reward/group_std_mean": 0.020026170834898948, "signal/format_reward/group_zero_std_frac": 0.9138889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0050564236007630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0050564236007630825, "signal/mean_confidence_reward/centered_abs_mean": 0.056768130511045456, "signal/mean_confidence_reward/group_std_mean": 0.07474597916007042, "signal/mean_confidence_reward/group_zero_std_frac": 0.3555555522441864, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.676812747879012e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.676812747879012e-07, "step": 685 }, { "calibration/aurc": 0.15312435512897443, "calibration/batch_distribution_entropy": 0.5166774911978564, "calibration/batch_entropy_100bins": 0.2830634113269603, "calibration/batch_entropy_10bins": 0.5166774911978564, "calibration/batch_entropy_50bins": 0.33321766788647383, "calibration/batch_uniqueness": 0.13921276205743438, "calibration/confidence_entropy": 0.4343256009444184, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.1671875, "calibration/coverage@10%": 0.49479173193536485, "calibration/coverage@15%": 0.506932057895946, "calibration/coverage@20%": 0.5360039544128602, "calibration/coverage@25%": 0.5819628647214854, "calibration/coverage@30%": 0.8712576153176675, "calibration/coverage@5%": 0.4500052214958542, "calibration/distribution_entropy_10": 0.5166774911978564, "calibration/distribution_entropy_100": 0.2830634113269603, "calibration/ece": 0.14271067610468618, "calibration/mean_confidence": 0.7531193116142556, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111094, "completions/max_length": 3827.8, "completions/max_terminated_length": 3827.8, "completions/mean_length": 974.21796875, "completions/mean_terminated_length": 979.8821655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 1.6586538461538463, "grad_norm": 0.0005919505492784083, "learning_rate": 2.103365384615385e-06, "loss": -0.003, "num_tokens": 1810795411.0, "reward": 1.2960154294967652, "reward_std": 0.10875263512134552, "rewards/accuracy_reward": 0.7316840171813965, "rewards/brier_reward": 0.865973949432373, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9943576574325561, "rewards/mean_confidence_reward": 0.7592534780502319, "sampling/batch_mean_priority_error": 0.046497556586661125, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18055555555555555, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010300593636929988, "sampling/priority_kl": 0.029999887570738792, "sampling/priority_scale": 0.7711004435783252, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 5.5005505419103426e-05, "sampling/prob_min": 2.032188313023653e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.651200008392334, "sampling/prompt_draws_total": 49536.0, "sampling/seen_fraction": 0.8882599949836731, "sampling/unseen_fraction": 0.1117400050163269, "signal/accuracy_reward/centered_abs_mean": 0.10711262971162797, "signal/accuracy_reward/group_std_mean": 0.14350715726614, "signal/accuracy_reward/group_zero_std_frac": 0.5833333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05355631485581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05355631485581398, "signal/advantage_abs_mean": 0.07843385115265847, "signal/advantage_pre_scale_abs_mean": 0.07843385115265847, "signal/advantage_pre_scale_std": 0.16597414314746856, "signal/advantage_std": 0.16597414314746856, "signal/brier_reward/centered_abs_mean": 0.06248611286282539, "signal/brier_reward/group_std_mean": 0.08682104349136352, "signal/brier_reward/group_zero_std_frac": 0.3777777850627899, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031243056431412695, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031243056431412695, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00728624127805233, "signal/format_reward/group_std_mean": 0.014419934712350368, "signal/format_reward/group_zero_std_frac": 0.9388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003643120639026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003643120639026165, "signal/mean_confidence_reward/centered_abs_mean": 0.052331282198429106, "signal/mean_confidence_reward/group_std_mean": 0.06940097361803055, "signal/mean_confidence_reward/group_zero_std_frac": 0.4111111104488373, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.233128092640982e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.233128092640982e-07, "step": 690 }, { "calibration/aurc": 0.1597328193560566, "calibration/batch_distribution_entropy": 0.48849890794341916, "calibration/batch_entropy_100bins": 0.2646663572293887, "calibration/batch_entropy_10bins": 0.48849890794341916, "calibration/batch_entropy_50bins": 0.31156095346465396, "calibration/batch_uniqueness": 0.06236602230248702, "calibration/confidence_entropy": 0.4200712640087535, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.2, "calibration/coverage@15%": 0.3395833333333333, "calibration/coverage@20%": 0.8581456701479546, "calibration/coverage@25%": 0.954569190600522, "calibration/coverage@30%": 0.9765013054830287, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.48849890794341916, "calibration/distribution_entropy_100": 0.2646663572293887, "calibration/ece": 0.11059222693646661, "calibration/mean_confidence": 0.785867194299391, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003472222222222232, "completions/max_length": 3532.2, "completions/max_terminated_length": 3532.2, "completions/mean_length": 981.27109375, "completions/mean_terminated_length": 984.7066528320313, "completions/min_length": 0.0, "completions/min_terminated_length": 239.4, "epoch": 1.6706730769230769, "grad_norm": 0.000510729500092566, "learning_rate": 2.073317307692308e-06, "loss": -0.0034, "num_tokens": 1825216070.0, "reward": 1.3108274698257447, "reward_std": 0.10949984639883041, "rewards/accuracy_reward": 0.7611111164093017, "rewards/brier_reward": 0.8640008687973022, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9965277910232544, "rewards/mean_confidence_reward": 0.7624218702316284, "sampling/batch_mean_priority_error": 0.04515728146589985, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.19444444444444448, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010345915518701076, "sampling/priority_kl": 0.03000008873641491, "sampling/priority_scale": 0.7729870260460302, "sampling/prob_entropy": 10.278949356079101, "sampling/prob_max": 5.521025886991993e-05, "sampling/prob_min": 2.0347893951111473e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6631999969482423, "sampling/prompt_draws_total": 49896.0, "sampling/seen_fraction": 0.8903666734695435, "sampling/unseen_fraction": 0.10963332653045654, "signal/accuracy_reward/centered_abs_mean": 0.10617404580116271, "signal/accuracy_reward/group_std_mean": 0.14624143689870833, "signal/accuracy_reward/group_zero_std_frac": 0.5611111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05308702290058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05308702290058136, "signal/advantage_abs_mean": 0.0775064080953598, "signal/advantage_pre_scale_abs_mean": 0.0775064080953598, "signal/advantage_pre_scale_std": 0.16172997355461122, "signal/advantage_std": 0.16172997355461122, "signal/brier_reward/centered_abs_mean": 0.06556255593895913, "signal/brier_reward/group_std_mean": 0.09077477753162384, "signal/brier_reward/group_zero_std_frac": 0.37500000596046446, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032781277969479564, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.032781277969479564, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006076388945803046, "signal/format_reward/group_std_mean": 0.011436474788933993, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003038194472901523, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003038194472901523, "signal/mean_confidence_reward/centered_abs_mean": 0.04954320043325424, "signal/mean_confidence_reward/group_std_mean": 0.06611797288060188, "signal/mean_confidence_reward/group_zero_std_frac": 0.425, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.954319933858642e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.954319933858642e-07, "step": 695 }, { "calibration/aurc": 0.17845096277736253, "calibration/batch_distribution_entropy": 0.507448634925201, "calibration/batch_entropy_100bins": 0.27393716460227274, "calibration/batch_entropy_10bins": 0.507448634925201, "calibration/batch_entropy_50bins": 0.32247439790360655, "calibration/batch_uniqueness": 0.1000203556561402, "calibration/confidence_entropy": 0.4250286446102992, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.1671875, "calibration/coverage@10%": 0.1875, "calibration/coverage@15%": 0.37232375979112275, "calibration/coverage@20%": 0.5614093777197563, "calibration/coverage@25%": 0.7432210073977371, "calibration/coverage@30%": 0.9024490248226951, "calibration/coverage@5%": 0.17291666666666666, "calibration/distribution_entropy_10": 0.507448634925201, "calibration/distribution_entropy_100": 0.27393716460227274, "calibration/ece": 0.10796262186822965, "calibration/mean_confidence": 0.769326342402274, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00277777777777779, "completions/max_length": 3776.8, "completions/max_terminated_length": 3776.8, "completions/mean_length": 1013.50947265625, "completions/mean_terminated_length": 1016.4000854492188, "completions/min_length": 0.0, "completions/min_terminated_length": 249.0, "epoch": 1.6826923076923077, "grad_norm": 0.0004915227764286101, "learning_rate": 2.043269230769231e-06, "loss": -0.001, "num_tokens": 1840023155.0, "reward": 1.2889822721481323, "reward_std": 0.10611613094806671, "rewards/accuracy_reward": 0.7234375, "rewards/brier_reward": 0.8572899460792541, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9972222208976745, "rewards/mean_confidence_reward": 0.748107624053955, "sampling/batch_mean_priority_error": 0.04979654861793659, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.175, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.01039465218782425, "sampling/priority_kl": 0.029999758675694467, "sampling/priority_scale": 0.77528671619948, "sampling/prob_entropy": 10.278947257995606, "sampling/prob_max": 5.543693623621948e-05, "sampling/prob_min": 2.0364622105262243e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6751999855041504, "sampling/prompt_draws_total": 50256.0, "sampling/seen_fraction": 0.8926133275032043, "sampling/unseen_fraction": 0.10738667249679565, "signal/accuracy_reward/centered_abs_mean": 0.11032986044883727, "signal/accuracy_reward/group_std_mean": 0.14479590356349945, "signal/accuracy_reward/group_zero_std_frac": 0.5888888835906982, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05516493022441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05516493022441864, "signal/advantage_abs_mean": 0.07801882773637772, "signal/advantage_pre_scale_abs_mean": 0.07801882773637772, "signal/advantage_pre_scale_std": 0.1612319380044937, "signal/advantage_std": 0.1612319380044937, "signal/brier_reward/centered_abs_mean": 0.06465163081884384, "signal/brier_reward/group_std_mean": 0.08930426687002183, "signal/brier_reward/group_zero_std_frac": 0.3027777761220932, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03232581540942192, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03232581540942192, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005088975746184587, "signal/format_reward/group_std_mean": 0.010972938034683466, "signal/format_reward/group_zero_std_frac": 0.950000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025444878730922936, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0025444878730922936, "signal/mean_confidence_reward/centered_abs_mean": 0.053876417130231856, "signal/mean_confidence_reward/group_std_mean": 0.0717897079885006, "signal/mean_confidence_reward/group_zero_std_frac": 0.3305555611848831, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.387641522247577e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.387641522247577e-07, "step": 700 }, { "epoch": 1.6826923076923077, "eval_calibration/aurc": 0.1281310126651566, "eval_calibration/batch_distribution_entropy": 0.6122337191613557, "eval_calibration/batch_entropy_100bins": 0.3384483209155802, "eval_calibration/batch_entropy_10bins": 0.6122337191613557, "eval_calibration/batch_entropy_50bins": 0.3984158873338679, "eval_calibration/batch_uniqueness": 0.3394313964103316, "eval_calibration/confidence_entropy": 0.45993146248134936, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.6172624237140366, "eval_calibration/coverage@15%": 0.7314734088927637, "eval_calibration/coverage@20%": 0.8134263295553618, "eval_calibration/coverage@25%": 0.9145597210113339, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6122337191613557, "eval_calibration/distribution_entropy_100": 0.3384483209155802, "eval_calibration/ece": 0.027114210985178678, "eval_calibration/mean_confidence": 0.7259808195292067, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.00434027777777779, "eval_completions/max_length": 2841.1666666666665, "eval_completions/max_terminated_length": 2841.1666666666665, "eval_completions/mean_length": 965.6305847167969, "eval_completions/mean_terminated_length": 969.8583780924479, "eval_completions/min_length": 118.66666666666667, "eval_completions/min_terminated_length": 306.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 1840023155.0, "eval_reward": 1.2856452663739522, "eval_reward_std": 0.3019138028224309, "eval_rewards/accuracy_reward": 0.71875, "eval_rewards/brier_reward": 0.8568663497765859, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9956597288449606, "eval_rewards/mean_confidence_reward": 0.7228298286596934, "eval_runtime": 187.0543, "eval_samples_per_second": 5.346, "eval_signal/accuracy_reward/centered_abs_mean": 0.3912760466337204, "eval_signal/accuracy_reward/group_std_mean": 0.447576567530632, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1956380233168602, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.1956380233168602, "eval_signal/advantage_abs_mean": 0.25326357036828995, "eval_signal/advantage_pre_scale_abs_mean": 0.25326357036828995, "eval_signal/advantage_pre_scale_std": 0.30079903701941174, "eval_signal/advantage_std": 0.30079903701941174, "eval_signal/brier_reward/centered_abs_mean": 0.15465549876292548, "eval_signal/brier_reward/group_std_mean": 0.2081133077541987, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07732774938146274, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.07732774938146274, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.008409287935743729, "eval_signal/format_reward/group_std_mean": 0.02455231888840596, "eval_signal/format_reward/group_zero_std_frac": 0.8611111342906952, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.19646265357732773, "eval_signal/mean_confidence_reward/group_std_mean": 0.22983714193105698, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.9646265248714676e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.9646265248714676e-06, "eval_steps_per_second": 0.032, "step": 700 }, { "epoch": 1.6826923076923077, "step": 700, "train_probe_calibration/aurc": 0.09242685611624851, "train_probe_calibration/batch_distribution_entropy": 0.5871877959379022, "train_probe_calibration/batch_entropy_100bins": 0.3208042085735073, "train_probe_calibration/batch_entropy_10bins": 0.5871877959379022, "train_probe_calibration/batch_entropy_50bins": 0.3776455237641256, "train_probe_calibration/batch_uniqueness": 0.28527984253508726, "train_probe_calibration/confidence_entropy": 0.45388621485494296, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.6966783216783217, "train_probe_calibration/coverage@15%": 0.8277972027972028, "train_probe_calibration/coverage@20%": 0.9388111888111889, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.5871877959379022, "train_probe_calibration/distribution_entropy_100": 0.3208042085735073, "train_probe_calibration/ece": 0.05061188811188807, "train_probe_calibration/mean_confidence": 0.7408216783216783, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.006944444444444457, "train_probe_completions/max_length": 3206.6666666666665, "train_probe_completions/max_terminated_length": 3206.6666666666665, "train_probe_completions/mean_length": 970.924326578776, "train_probe_completions/mean_terminated_length": 977.7750854492188, "train_probe_completions/min_length": 49.333333333333336, "train_probe_completions/min_terminated_length": 250.33333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 1840023155.0, "train_probe_reward": 1.3135360479354858, "train_probe_reward_std": 0.2896604041258494, "train_probe_rewards/accuracy_reward": 0.7638888855775198, "train_probe_rewards/brier_reward": 0.8701128363609314, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9930555621782938, "train_probe_rewards/mean_confidence_reward": 0.7356770634651184, "train_probe_runtime": 201.7941, "train_probe_samples_per_second": 4.956, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3512369791666667, "train_probe_signal/accuracy_reward/group_std_mean": 0.42336805164813995, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.17561848958333334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17561848958333334, "train_probe_signal/advantage_abs_mean": 0.23088564723730087, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23088564723730087, "train_probe_signal/advantage_pre_scale_std": 0.2906094044446945, "train_probe_signal/advantage_std": 0.2906094044446945, "train_probe_signal/brier_reward/centered_abs_mean": 0.14325847725073496, "train_probe_signal/brier_reward/group_std_mean": 0.19320360322793326, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07162923862536748, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07162923862536748, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.013346354011446238, "train_probe_signal/format_reward/group_std_mean": 0.0362943010404706, "train_probe_signal/format_reward/group_zero_std_frac": 0.8055555820465088, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006673177005723119, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006673177005723119, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.19063582768042883, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.2232448955376943, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.9063582499256881e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.9063582499256881e-06, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.08404616910375391, "calibration/batch_distribution_entropy": 0.5988151606281764, "calibration/batch_entropy_100bins": 0.3335366281759386, "calibration/batch_entropy_10bins": 0.5988151606281764, "calibration/batch_entropy_50bins": 0.39263392211128456, "calibration/batch_uniqueness": 0.3433787299809475, "calibration/confidence_entropy": 0.466011845904414, "calibration/coverage@0%": 0.1418848167539267, "calibration/coverage@1%": 0.36977537346023875, "calibration/coverage@10%": 0.5878690372809116, "calibration/coverage@15%": 0.803157923515746, "calibration/coverage@20%": 0.9260925874896666, "calibration/coverage@25%": 0.9564590796362633, "calibration/coverage@30%": 0.9879581151832462, "calibration/coverage@5%": 0.40542044899406937, "calibration/distribution_entropy_10": 0.5988151606281764, "calibration/distribution_entropy_100": 0.3335366281759386, "calibration/ece": 0.11570963956581257, "calibration/mean_confidence": 0.7057089920404749, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00425347222222221, "completions/max_length": 3435.0, "completions/max_terminated_length": 3435.0, "completions/mean_length": 980.7009643554687, "completions/mean_terminated_length": 984.8880493164063, "completions/min_length": 0.0, "completions/min_terminated_length": 281.6, "epoch": 1.6947115384615383, "grad_norm": 0.00038545418647117913, "learning_rate": 2.013221153846154e-06, "loss": -0.0039, "num_tokens": 1854408702.0, "reward": 1.307462787628174, "reward_std": 0.09673023074865342, "rewards/accuracy_reward": 0.7457465171813965, "rewards/brier_reward": 0.8734184384346009, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9957465410232544, "rewards/mean_confidence_reward": 0.7106597065925598, "sampling/batch_mean_priority_error": 0.032918609107163224, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20555555555555555, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010429654642939568, "sampling/priority_kl": 0.029999804869294166, "sampling/priority_scale": 0.7776785313850268, "sampling/prob_entropy": 10.278947067260741, "sampling/prob_max": 5.5668204731773584e-05, "sampling/prob_min": 2.038492457359098e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6871999979019165, "sampling/prompt_draws_total": 50616.0, "sampling/seen_fraction": 0.8948533415794373, "sampling/unseen_fraction": 0.10514665842056274, "signal/accuracy_reward/centered_abs_mean": 0.10552842915058136, "signal/accuracy_reward/group_std_mean": 0.14385080635547637, "signal/accuracy_reward/group_zero_std_frac": 0.5777777969837189, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05276421457529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05276421457529068, "signal/advantage_abs_mean": 0.06741584911942482, "signal/advantage_pre_scale_abs_mean": 0.06741584911942482, "signal/advantage_pre_scale_std": 0.1482072502374649, "signal/advantage_std": 0.1482072502374649, "signal/brier_reward/centered_abs_mean": 0.05303824469447136, "signal/brier_reward/group_std_mean": 0.07423444986343383, "signal/brier_reward/group_zero_std_frac": 0.3277777791023254, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02651912234723568, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02651912234723568, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007796223927289248, "signal/format_reward/group_std_mean": 0.016708648204803465, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003898111963644624, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003898111963644624, "signal/mean_confidence_reward/centered_abs_mean": 0.04716092497110367, "signal/mean_confidence_reward/group_std_mean": 0.06453502401709557, "signal/mean_confidence_reward/group_zero_std_frac": 0.34166666865348816, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.716092234957614e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.716092234957614e-07, "step": 705 }, { "calibration/aurc": 0.056411707079725336, "calibration/batch_distribution_entropy": 0.5792413277835646, "calibration/batch_entropy_100bins": 0.31917351489767043, "calibration/batch_entropy_10bins": 0.5792413277835646, "calibration/batch_entropy_50bins": 0.3757258975533331, "calibration/batch_uniqueness": 0.2996425228471575, "calibration/confidence_entropy": 0.4706598405371064, "calibration/coverage@0%": 0.26875, "calibration/coverage@1%": 0.440625, "calibration/coverage@10%": 0.8707297120418849, "calibration/coverage@15%": 0.8801047120418849, "calibration/coverage@20%": 0.9183246073298429, "calibration/coverage@25%": 0.9183246073298429, "calibration/coverage@30%": 0.9183246073298429, "calibration/coverage@5%": 0.7516458333333333, "calibration/distribution_entropy_10": 0.5792413277835646, "calibration/distribution_entropy_100": 0.31917351489767043, "calibration/ece": 0.14625667539266998, "calibration/mean_confidence": 0.7274407286212916, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004253472222222232, "completions/max_length": 3389.8, "completions/max_terminated_length": 3389.8, "completions/mean_length": 966.7477416992188, "completions/mean_terminated_length": 970.8892822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 237.2, "epoch": 1.7067307692307692, "grad_norm": 0.0004834532446693629, "learning_rate": 1.983173076923077e-06, "loss": -0.0035, "num_tokens": 1868619428.0, "reward": 1.3241670846939086, "reward_std": 0.0868344023823738, "rewards/accuracy_reward": 0.7802951455116272, "rewards/brier_reward": 0.87227863073349, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9957465171813965, "rewards/mean_confidence_reward": 0.6925434112548828, "sampling/batch_mean_priority_error": 0.030951319032566225, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17777777777777778, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010457353666424751, "sampling/priority_kl": 0.030000027641654013, "sampling/priority_scale": 0.7802383721107617, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 5.590729124378413e-05, "sampling/prob_min": 2.0404893803060986e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6991999864578247, "sampling/prompt_draws_total": 50976.0, "sampling/seen_fraction": 0.8971199989318848, "sampling/unseen_fraction": 0.10288000106811523, "signal/accuracy_reward/centered_abs_mean": 0.09852973222732545, "signal/accuracy_reward/group_std_mean": 0.13320806324481965, "signal/accuracy_reward/group_zero_std_frac": 0.6055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04926486611366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04926486611366272, "signal/advantage_abs_mean": 0.06283641681075096, "signal/advantage_pre_scale_abs_mean": 0.06283641681075096, "signal/advantage_pre_scale_std": 0.13592168539762498, "signal/advantage_std": 0.13592168539762498, "signal/brier_reward/centered_abs_mean": 0.05235719159245491, "signal/brier_reward/group_std_mean": 0.0704245388507843, "signal/brier_reward/group_zero_std_frac": 0.26388888955116274, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026178595796227454, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.026178595796227454, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007145182462409139, "signal/format_reward/group_std_mean": 0.012697589956223965, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0035725912312045693, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0035725912312045693, "signal/mean_confidence_reward/centered_abs_mean": 0.05213705077767372, "signal/mean_confidence_reward/group_std_mean": 0.06859569475054741, "signal/mean_confidence_reward/group_zero_std_frac": 0.26666666865348815, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.2137048669465e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.2137048669465e-07, "step": 710 }, { "calibration/aurc": 0.10144029251164566, "calibration/batch_distribution_entropy": 0.5868865138582311, "calibration/batch_entropy_100bins": 0.3227534884668084, "calibration/batch_entropy_10bins": 0.5868865138582311, "calibration/batch_entropy_50bins": 0.3799401845154352, "calibration/batch_uniqueness": 0.3160996147466604, "calibration/confidence_entropy": 0.4645166674578388, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.13125, "calibration/coverage@10%": 0.5943580933682373, "calibration/coverage@15%": 0.6554482984293194, "calibration/coverage@20%": 0.7467822862129144, "calibration/coverage@25%": 0.9282722513089006, "calibration/coverage@30%": 0.9429319371727749, "calibration/coverage@5%": 0.5207624345549738, "calibration/distribution_entropy_10": 0.5868865138582311, "calibration/distribution_entropy_100": 0.3227534884668084, "calibration/ece": 0.14183191535776607, "calibration/mean_confidence": 0.7273729275741712, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 3701.4, "completions/max_terminated_length": 3701.4, "completions/mean_length": 996.6374145507813, "completions/mean_terminated_length": 999.7630126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 315.6, "epoch": 1.71875, "grad_norm": 0.0005059548420831561, "learning_rate": 1.953125e-06, "loss": -0.0017, "num_tokens": 1883175795.0, "reward": 1.3099539518356322, "reward_std": 0.09733439832925797, "rewards/accuracy_reward": 0.7560763835906983, "rewards/brier_reward": 0.8669427156448364, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.996875011920929, "rewards/mean_confidence_reward": 0.6936632037162781, "sampling/batch_mean_priority_error": 0.03457258739857514, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17222222222222222, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.01048535518348217, "sampling/priority_kl": 0.030000003799796105, "sampling/priority_scale": 0.7830242693657056, "sampling/prob_entropy": 10.278955459594727, "sampling/prob_max": 5.615855916403234e-05, "sampling/prob_min": 2.0421350927790628e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7111999988555908, "sampling/prompt_draws_total": 51336.0, "sampling/seen_fraction": 0.8994266510009765, "sampling/unseen_fraction": 0.10057334899902344, "signal/accuracy_reward/centered_abs_mean": 0.10865885317325592, "signal/accuracy_reward/group_std_mean": 0.14986452460289001, "signal/accuracy_reward/group_zero_std_frac": 0.550000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05432942658662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05432942658662796, "signal/advantage_abs_mean": 0.06795400306582451, "signal/advantage_pre_scale_abs_mean": 0.06795400306582451, "signal/advantage_pre_scale_std": 0.14416785538196564, "signal/advantage_std": 0.14416785538196564, "signal/brier_reward/centered_abs_mean": 0.05352667644619942, "signal/brier_reward/group_std_mean": 0.07452422827482223, "signal/brier_reward/group_zero_std_frac": 0.2277777761220932, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02676333822309971, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02676333822309971, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005642361147329212, "signal/format_reward/group_std_mean": 0.011255127005279064, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002821180573664606, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002821180573664606, "signal/mean_confidence_reward/centered_abs_mean": 0.04913303405046463, "signal/mean_confidence_reward/group_std_mean": 0.06512986496090889, "signal/mean_confidence_reward/group_zero_std_frac": 0.23888888955116272, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.913303087050735e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.913303087050735e-07, "step": 715 }, { "calibration/aurc": 0.11201574671393137, "calibration/batch_distribution_entropy": 0.6315005836708136, "calibration/batch_entropy_100bins": 0.35334694917426485, "calibration/batch_entropy_10bins": 0.6315005836708136, "calibration/batch_entropy_50bins": 0.41595431146220596, "calibration/batch_uniqueness": 0.4524251302083334, "calibration/confidence_entropy": 0.4942181505118133, "calibration/coverage@0%": 0.0859375, "calibration/coverage@1%": 0.196875, "calibration/coverage@10%": 0.47916666666666663, "calibration/coverage@15%": 0.71875, "calibration/coverage@20%": 0.7713541666666667, "calibration/coverage@25%": 0.8442708333333334, "calibration/coverage@30%": 0.9651041666666668, "calibration/coverage@5%": 0.33437500000000003, "calibration/distribution_entropy_10": 0.6315005836708136, "calibration/distribution_entropy_100": 0.35334694917426485, "calibration/ece": 0.11661458333333326, "calibration/mean_confidence": 0.6916145833333335, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002951388888888862, "completions/max_length": 3609.6, "completions/max_terminated_length": 3609.6, "completions/mean_length": 992.2913208007812, "completions/mean_terminated_length": 995.2805786132812, "completions/min_length": 0.0, "completions/min_terminated_length": 267.0, "epoch": 1.7307692307692308, "grad_norm": 0.0004260011774022132, "learning_rate": 1.9230769230769234e-06, "loss": -0.0018, "num_tokens": 1897715887.0, "reward": 1.3017566204071045, "reward_std": 0.095692677795887, "rewards/accuracy_reward": 0.7407986044883728, "rewards/brier_reward": 0.8656519293785095, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9970486044883728, "rewards/mean_confidence_reward": 0.705946171283722, "sampling/batch_mean_priority_error": 0.03343948360511164, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16666666666666669, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010514728166162968, "sampling/priority_kl": 0.029999824613332747, "sampling/priority_scale": 0.7848560034064576, "sampling/prob_entropy": 10.27894992828369, "sampling/prob_max": 5.6361448514508083e-05, "sampling/prob_min": 2.0444238180061804e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.723200011253357, "sampling/prompt_draws_total": 51696.0, "sampling/seen_fraction": 0.9012800097465515, "sampling/unseen_fraction": 0.09871999025344849, "signal/accuracy_reward/centered_abs_mean": 0.11094835102558136, "signal/accuracy_reward/group_std_mean": 0.14612414538860322, "signal/accuracy_reward/group_zero_std_frac": 0.5833333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05547417551279068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05547417551279068, "signal/advantage_abs_mean": 0.06998142004013061, "signal/advantage_pre_scale_abs_mean": 0.06998142004013061, "signal/advantage_pre_scale_std": 0.1458520144224167, "signal/advantage_std": 0.1458520144224167, "signal/brier_reward/centered_abs_mean": 0.05215005427598953, "signal/brier_reward/group_std_mean": 0.07294831350445748, "signal/brier_reward/group_zero_std_frac": 0.272222226858139, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026075027137994766, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.026075027137994766, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004926215298473835, "signal/format_reward/group_std_mean": 0.01056935265660286, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0024631076492369177, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0024631076492369177, "signal/mean_confidence_reward/centered_abs_mean": 0.04923503249883652, "signal/mean_confidence_reward/group_std_mean": 0.06458318158984185, "signal/mean_confidence_reward/group_zero_std_frac": 0.2805555611848831, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.923503070131118e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.923503070131118e-07, "step": 720 }, { "calibration/aurc": 0.06508023949155624, "calibration/batch_distribution_entropy": 0.5558616334714455, "calibration/batch_entropy_100bins": 0.3077710333755689, "calibration/batch_entropy_10bins": 0.5558616334714455, "calibration/batch_entropy_50bins": 0.3623030807961205, "calibration/batch_uniqueness": 0.27050792753929254, "calibration/confidence_entropy": 0.4731002437303237, "calibration/coverage@0%": 0.2578125, "calibration/coverage@1%": 0.2838541666666667, "calibration/coverage@10%": 0.6793594160104987, "calibration/coverage@15%": 0.8896448490813649, "calibration/coverage@20%": 0.9484375, "calibration/coverage@25%": 0.9901041666666666, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5234375, "calibration/distribution_entropy_10": 0.5558616334714455, "calibration/distribution_entropy_100": 0.3077710333755689, "calibration/ece": 0.1668618766404199, "calibration/mean_confidence": 0.7121259842519685, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002256944444444442, "completions/max_length": 3377.8, "completions/max_terminated_length": 3377.8, "completions/mean_length": 1015.0033813476563, "completions/mean_terminated_length": 1017.2540283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 301.0, "epoch": 1.7427884615384617, "grad_norm": 0.00045864039566367865, "learning_rate": 1.8930288461538463e-06, "loss": -0.0012, "num_tokens": 1912523734.0, "reward": 1.330811357498169, "reward_std": 0.08866008371114731, "rewards/accuracy_reward": 0.7939236044883728, "rewards/brier_reward": 0.8700286626815796, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.99765625, "rewards/mean_confidence_reward": 0.7115191102027894, "sampling/batch_mean_priority_error": 0.03423864334196256, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.15555555555555553, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.01054015289992094, "sampling/priority_kl": 0.030000202357769012, "sampling/priority_scale": 0.787105053733103, "sampling/prob_entropy": 10.278961372375488, "sampling/prob_max": 5.6586982100270686e-05, "sampling/prob_min": 2.0466489513637497e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.735199999809265, "sampling/prompt_draws_total": 52056.0, "sampling/seen_fraction": 0.9032733321189881, "sampling/unseen_fraction": 0.09672666788101196, "signal/accuracy_reward/centered_abs_mean": 0.09814453125, "signal/accuracy_reward/group_std_mean": 0.13914075791835784, "signal/accuracy_reward/group_zero_std_frac": 0.5666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049072265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049072265625, "signal/advantage_abs_mean": 0.06016180217266083, "signal/advantage_pre_scale_abs_mean": 0.06016180217266083, "signal/advantage_pre_scale_std": 0.13105083703994752, "signal/advantage_std": 0.13105083703994752, "signal/brier_reward/centered_abs_mean": 0.05176843777298927, "signal/brier_reward/group_std_mean": 0.0720445230603218, "signal/brier_reward/group_zero_std_frac": 0.2833333373069763, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025884218886494635, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.025884218886494635, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004258897516410798, "signal/format_reward/group_std_mean": 0.009134303964674472, "signal/format_reward/group_zero_std_frac": 0.9583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002129448758205399, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002129448758205399, "signal/mean_confidence_reward/centered_abs_mean": 0.04849230349063873, "signal/mean_confidence_reward/group_std_mean": 0.06421271786093712, "signal/mean_confidence_reward/group_zero_std_frac": 0.2916666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.849230208492372e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.849230208492372e-07, "step": 725 }, { "calibration/aurc": 0.13226417419465478, "calibration/batch_distribution_entropy": 0.556962817758184, "calibration/batch_entropy_100bins": 0.3047709072838795, "calibration/batch_entropy_10bins": 0.556962817758184, "calibration/batch_entropy_50bins": 0.35877138090261723, "calibration/batch_uniqueness": 0.2528834304460806, "calibration/confidence_entropy": 0.46571681160407047, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.6160899461925753, "calibration/coverage@15%": 0.7093813230942497, "calibration/coverage@20%": 0.7668666416350868, "calibration/coverage@25%": 0.8565204872604044, "calibration/coverage@30%": 0.9248680918189731, "calibration/coverage@5%": 0.19345354656222802, "calibration/distribution_entropy_10": 0.556962817758184, "calibration/distribution_entropy_100": 0.3047709072838795, "calibration/ece": 0.10036003188724493, "calibration/mean_confidence": 0.7258346391964574, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001996527777777768, "completions/max_length": 3585.6, "completions/max_terminated_length": 3585.6, "completions/mean_length": 1024.8122680664062, "completions/mean_terminated_length": 1026.8658447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 247.4, "epoch": 1.7548076923076923, "grad_norm": 0.000421989883761853, "learning_rate": 1.8629807692307695e-06, "loss": -0.0009, "num_tokens": 1927461891.0, "reward": 1.310717487335205, "reward_std": 0.10098863393068314, "rewards/accuracy_reward": 0.7563368201255798, "rewards/brier_reward": 0.8670798420906067, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9980034708976746, "rewards/mean_confidence_reward": 0.7394097208976745, "sampling/batch_mean_priority_error": 0.03910652136909175, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17222222222222222, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010574315674602985, "sampling/priority_kl": 0.02999994680285454, "sampling/priority_scale": 0.7895619689719752, "sampling/prob_entropy": 10.278949356079101, "sampling/prob_max": 5.68221221328713e-05, "sampling/prob_min": 2.0486395442276262e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7472000122070312, "sampling/prompt_draws_total": 52416.0, "sampling/seen_fraction": 0.9052866697311401, "sampling/unseen_fraction": 0.09471333026885986, "signal/accuracy_reward/centered_abs_mean": 0.10135091096162796, "signal/accuracy_reward/group_std_mean": 0.1404233992099762, "signal/accuracy_reward/group_zero_std_frac": 0.575000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05067545548081398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05067545548081398, "signal/advantage_abs_mean": 0.07134843468666077, "signal/advantage_pre_scale_abs_mean": 0.07134843468666077, "signal/advantage_pre_scale_std": 0.15252363979816436, "signal/advantage_std": 0.15252363979816436, "signal/brier_reward/centered_abs_mean": 0.05917214304208755, "signal/brier_reward/group_std_mean": 0.08254666775465011, "signal/brier_reward/group_zero_std_frac": 0.2888888895511627, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029586071521043776, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029586071521043776, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.003759765566792339, "signal/format_reward/group_std_mean": 0.009313787892460823, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018798827833961696, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0018798827833961696, "signal/mean_confidence_reward/centered_abs_mean": 0.04908203333616257, "signal/mean_confidence_reward/group_std_mean": 0.06572242081165314, "signal/mean_confidence_reward/group_zero_std_frac": 0.3083333343267441, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.908203493414476e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.908203493414476e-07, "step": 730 }, { "calibration/aurc": 0.16262073369685778, "calibration/batch_distribution_entropy": 0.38419717906836637, "calibration/batch_entropy_100bins": 0.20703974933536848, "calibration/batch_entropy_10bins": 0.38419717906836637, "calibration/batch_entropy_50bins": 0.2437238430425174, "calibration/batch_uniqueness": -0.13750190241657567, "calibration/confidence_entropy": 0.4000716698967516, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.15104166666666669, "calibration/coverage@10%": 0.3713541666666667, "calibration/coverage@15%": 0.5270833333333333, "calibration/coverage@20%": 0.5270833333333333, "calibration/coverage@25%": 0.7703125, "calibration/coverage@30%": 0.9191103677110531, "calibration/coverage@5%": 0.3458333333333333, "calibration/distribution_entropy_10": 0.38419717906836637, "calibration/distribution_entropy_100": 0.20703974933536848, "calibration/ece": 0.13378821257615323, "calibration/mean_confidence": 0.8116920963881638, "calibration/unique_confidence_per_question": 0.017708333333333333, "calibration/unique_confidences": 6.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888888838, "completions/max_length": 3682.4, "completions/max_terminated_length": 3682.4, "completions/mean_length": 980.3084350585938, "completions/mean_terminated_length": 982.4520629882812, "completions/min_length": 0.0, "completions/min_terminated_length": 243.6, "epoch": 1.7668269230769231, "grad_norm": 0.0006200639181770384, "learning_rate": 1.8329326923076924e-06, "loss": -0.0005, "num_tokens": 1941838276.0, "reward": 1.3009915828704834, "reward_std": 0.10750979483127594, "rewards/accuracy_reward": 0.7427083373069763, "rewards/brier_reward": 0.8618637204170227, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9973958253860473, "rewards/mean_confidence_reward": 0.769019079208374, "sampling/batch_mean_priority_error": 0.05118026096658691, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010620946437120438, "sampling/priority_kl": 0.030000010132789613, "sampling/priority_scale": 0.791469556116499, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 5.703187925973907e-05, "sampling/prob_min": 2.051140763796866e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7592000246047974, "sampling/prompt_draws_total": 52776.0, "sampling/seen_fraction": 0.9070466756820679, "sampling/unseen_fraction": 0.09295332431793213, "signal/accuracy_reward/centered_abs_mean": 0.11150173842906952, "signal/accuracy_reward/group_std_mean": 0.14786103665828704, "signal/accuracy_reward/group_zero_std_frac": 0.5722222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05575086921453476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05575086921453476, "signal/advantage_abs_mean": 0.07811176404356956, "signal/advantage_pre_scale_abs_mean": 0.07811176404356956, "signal/advantage_pre_scale_std": 0.1644335836172104, "signal/advantage_std": 0.1644335836172104, "signal/brier_reward/centered_abs_mean": 0.062073732167482375, "signal/brier_reward/group_std_mean": 0.08578675836324692, "signal/brier_reward/group_zero_std_frac": 0.3694444537162781, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031036866083741187, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031036866083741187, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.0048936631763353946, "signal/format_reward/group_std_mean": 0.011555349640548229, "signal/format_reward/group_zero_std_frac": 0.944444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0024468315881676973, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0024468315881676973, "signal/mean_confidence_reward/centered_abs_mean": 0.047903656214475634, "signal/mean_confidence_reward/group_std_mean": 0.06507696658372879, "signal/mean_confidence_reward/group_zero_std_frac": 0.40277777910232543, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.790365494500292e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.790365494500292e-07, "step": 735 }, { "calibration/aurc": 0.0984268474633593, "calibration/batch_distribution_entropy": 0.37509171035558264, "calibration/batch_entropy_100bins": 0.20162115419693408, "calibration/batch_entropy_10bins": 0.37509171035558264, "calibration/batch_entropy_50bins": 0.23734516051768728, "calibration/batch_uniqueness": -0.18610338479294536, "calibration/confidence_entropy": 0.40682152374552005, "calibration/coverage@0%": 0.18385416666666668, "calibration/coverage@1%": 0.2, "calibration/coverage@10%": 0.5096891318537858, "calibration/coverage@15%": 0.8533126631853787, "calibration/coverage@20%": 0.900262456483899, "calibration/coverage@25%": 0.9075718015665796, "calibration/coverage@30%": 0.9556135770234988, "calibration/coverage@5%": 0.47422758920800695, "calibration/distribution_entropy_10": 0.37509171035558264, "calibration/distribution_entropy_100": 0.20162115419693408, "calibration/ece": 0.11171507832898168, "calibration/mean_confidence": 0.7978674390774588, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00234375, "completions/max_length": 3268.0, "completions/max_terminated_length": 3268.0, "completions/mean_length": 974.5008911132812, "completions/mean_terminated_length": 976.8037719726562, "completions/min_length": 0.0, "completions/min_terminated_length": 246.2, "epoch": 1.7788461538461537, "grad_norm": 0.0006327030714601278, "learning_rate": 1.8028846153846156e-06, "loss": -0.0003, "num_tokens": 1956136878.0, "reward": 1.286976194381714, "reward_std": 0.11908978521823883, "rewards/accuracy_reward": 0.7282986164093017, "rewards/brier_reward": 0.8480685830116272, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.997569453716278, "rewards/mean_confidence_reward": 0.7898784637451172, "sampling/batch_mean_priority_error": 0.061700169462697484, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16111111111111112, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010678479447960854, "sampling/priority_kl": 0.03000004291534424, "sampling/priority_scale": 0.7937021076912061, "sampling/prob_entropy": 10.278962516784668, "sampling/prob_max": 5.7258721790276466e-05, "sampling/prob_min": 2.0533668430289253e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7711999893188477, "sampling/prompt_draws_total": 53136.0, "sampling/seen_fraction": 0.908893346786499, "sampling/unseen_fraction": 0.09110665321350098, "signal/accuracy_reward/centered_abs_mean": 0.10706380307674408, "signal/accuracy_reward/group_std_mean": 0.1492036297917366, "signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05353190153837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05353190153837204, "signal/advantage_abs_mean": 0.0839640349149704, "signal/advantage_pre_scale_abs_mean": 0.0839640349149704, "signal/advantage_pre_scale_std": 0.1742776095867157, "signal/advantage_std": 0.1742776095867157, "signal/brier_reward/centered_abs_mean": 0.07154948264360428, "signal/brier_reward/group_std_mean": 0.1000126764178276, "signal/brier_reward/group_zero_std_frac": 0.38611111640930174, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03577474132180214, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03577474132180214, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004459635436069221, "signal/format_reward/group_std_mean": 0.0099914675578475, "signal/format_reward/group_zero_std_frac": 0.9527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0022298177180346103, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0022298177180346103, "signal/mean_confidence_reward/centered_abs_mean": 0.04505589380860329, "signal/mean_confidence_reward/group_std_mean": 0.05993512943387032, "signal/mean_confidence_reward/group_zero_std_frac": 0.43611112236976624, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.505589231484919e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.505589231484919e-07, "step": 740 }, { "calibration/aurc": 0.05148233310008995, "calibration/batch_distribution_entropy": 0.3488501157020723, "calibration/batch_entropy_100bins": 0.1841267741588303, "calibration/batch_entropy_10bins": 0.3488501157020723, "calibration/batch_entropy_50bins": 0.2167510593935289, "calibration/batch_uniqueness": -0.2663174130925411, "calibration/confidence_entropy": 0.3887985954627028, "calibration/coverage@0%": 0.1712041884816754, "calibration/coverage@1%": 0.1806282722513089, "calibration/coverage@10%": 0.8939155664687857, "calibration/coverage@15%": 0.9435818569553807, "calibration/coverage@20%": 0.9755208333333334, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6470718503937007, "calibration/distribution_entropy_10": 0.3488501157020723, "calibration/distribution_entropy_100": 0.1841267741588303, "calibration/ece": 0.08471214065355696, "calibration/mean_confidence": 0.8317367237635873, "calibration/unique_confidence_per_question": 0.01927083333333333, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0035590277777777903, "completions/max_length": 3580.2, "completions/max_terminated_length": 3580.2, "completions/mean_length": 981.0261352539062, "completions/mean_terminated_length": 984.6888549804687, "completions/min_length": 0.0, "completions/min_terminated_length": 272.6, "epoch": 1.7908653846153846, "grad_norm": 0.0005542809376493096, "learning_rate": 1.7728365384615387e-06, "loss": -0.0019, "num_tokens": 1970542395.0, "reward": 1.3384529113769532, "reward_std": 0.09226970225572587, "rewards/accuracy_reward": 0.7962673544883728, "rewards/brier_reward": 0.8842682242393494, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9963541746139526, "rewards/mean_confidence_reward": 0.8027343511581421, "sampling/batch_mean_priority_error": 0.0450249363335027, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.175, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.01073062587529421, "sampling/priority_kl": 0.030000034347176553, "sampling/priority_scale": 0.7963388859992847, "sampling/prob_entropy": 10.278950309753418, "sampling/prob_max": 5.750622949562967e-05, "sampling/prob_min": 2.0552280693664217e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7831999778747558, "sampling/prompt_draws_total": 53496.0, "sampling/seen_fraction": 0.9108466625213623, "sampling/unseen_fraction": 0.0891533374786377, "signal/accuracy_reward/centered_abs_mean": 0.08584526777267457, "signal/accuracy_reward/group_std_mean": 0.11938865035772324, "signal/accuracy_reward/group_zero_std_frac": 0.6388889074325561, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04292263388633728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04292263388633728, "signal/advantage_abs_mean": 0.06494638547301293, "signal/advantage_pre_scale_abs_mean": 0.06494638547301293, "signal/advantage_pre_scale_std": 0.15016201436519622, "signal/advantage_std": 0.15016201436519622, "signal/brier_reward/centered_abs_mean": 0.05143366307020188, "signal/brier_reward/group_std_mean": 0.07305691987276078, "signal/brier_reward/group_zero_std_frac": 0.4916666805744171, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02571683153510094, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02571683153510094, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006011284794658423, "signal/format_reward/group_std_mean": 0.010725349932909013, "signal/format_reward/group_zero_std_frac": 0.9583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0030056423973292114, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0030056423973292114, "signal/mean_confidence_reward/centered_abs_mean": 0.04237794876098633, "signal/mean_confidence_reward/group_std_mean": 0.05708230137825012, "signal/mean_confidence_reward/group_zero_std_frac": 0.5194444537162781, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.2377944851068607e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.2377944851068607e-07, "step": 745 }, { "calibration/aurc": 0.12513098641408765, "calibration/batch_distribution_entropy": 0.4386680736185493, "calibration/batch_entropy_100bins": 0.23583337034985075, "calibration/batch_entropy_10bins": 0.4386680736185493, "calibration/batch_entropy_50bins": 0.2776192278238811, "calibration/batch_uniqueness": -0.0454765110770183, "calibration/confidence_entropy": 0.42076816317473975, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5476555700609226, "calibration/coverage@15%": 0.7570740861618799, "calibration/coverage@20%": 0.8138571583986074, "calibration/coverage@25%": 0.8925655461270671, "calibration/coverage@30%": 0.8925655461270671, "calibration/coverage@5%": 0.19166666666666668, "calibration/distribution_entropy_10": 0.4386680736185493, "calibration/distribution_entropy_100": 0.23583337034985075, "calibration/ece": 0.09808841927763264, "calibration/mean_confidence": 0.7692097748041777, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0019097222222222321, "completions/max_length": 3707.2, "completions/max_terminated_length": 3707.2, "completions/mean_length": 1036.6534790039063, "completions/mean_terminated_length": 1038.6652465820312, "completions/min_length": 0.0, "completions/min_terminated_length": 335.2, "epoch": 1.8028846153846154, "grad_norm": 0.0005948901525698602, "learning_rate": 1.7427884615384616e-06, "loss": -0.001, "num_tokens": 1985591043.0, "reward": 1.3137980937957763, "reward_std": 0.10690182149410248, "rewards/accuracy_reward": 0.753125, "rewards/brier_reward": 0.8765390992164612, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9979166626930237, "rewards/mean_confidence_reward": 0.7790190696716308, "sampling/batch_mean_priority_error": 0.04252574058302948, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17777777777777776, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010769122280180454, "sampling/priority_kl": 0.02999994307756424, "sampling/priority_scale": 0.7995206534629687, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 5.777966289315373e-05, "sampling/prob_min": 2.0565289742080494e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.795199990272522, "sampling/prompt_draws_total": 53856.0, "sampling/seen_fraction": 0.9129800081253052, "sampling/unseen_fraction": 0.08701999187469482, "signal/accuracy_reward/centered_abs_mean": 0.10002170205116272, "signal/accuracy_reward/group_std_mean": 0.13846919387578965, "signal/accuracy_reward/group_zero_std_frac": 0.5861111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05001085102558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05001085102558136, "signal/advantage_abs_mean": 0.07599502354860306, "signal/advantage_pre_scale_abs_mean": 0.07599502354860306, "signal/advantage_pre_scale_std": 0.16599199771881104, "signal/advantage_std": 0.16599199771881104, "signal/brier_reward/centered_abs_mean": 0.06395492181181908, "signal/brier_reward/group_std_mean": 0.08838129937648773, "signal/brier_reward/group_zero_std_frac": 0.4138888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03197746090590954, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03197746090590954, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.003971354104578495, "signal/format_reward/group_std_mean": 0.010237774625420571, "signal/format_reward/group_zero_std_frac": 0.9472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019856770522892475, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019856770522892475, "signal/mean_confidence_reward/centered_abs_mean": 0.045269110798835756, "signal/mean_confidence_reward/group_std_mean": 0.06089982837438583, "signal/mean_confidence_reward/group_zero_std_frac": 0.45000000596046447, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.5269109705259325e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.5269109705259325e-07, "step": 750 }, { "epoch": 1.8028846153846154, "eval_calibration/aurc": 0.13990344378862302, "eval_calibration/batch_distribution_entropy": 0.49701023927115834, "eval_calibration/batch_entropy_100bins": 0.2746262363397858, "eval_calibration/batch_entropy_10bins": 0.49701023927115834, "eval_calibration/batch_entropy_50bins": 0.3232855620039196, "eval_calibration/batch_uniqueness": 0.08372214988425933, "eval_calibration/confidence_entropy": 0.42793329476976205, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.7326388888888888, "eval_calibration/coverage@20%": 0.8489583333333334, "eval_calibration/coverage@25%": 0.9348958333333334, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.49701023927115834, "eval_calibration/distribution_entropy_100": 0.2746262363397858, "eval_calibration/ece": 0.03541666666666681, "eval_calibration/mean_confidence": 0.763888888888889, "eval_calibration/unique_confidence_per_question": 0.006944444444444444, "eval_calibration/unique_confidences": 8, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 2543.8333333333335, "eval_completions/max_terminated_length": 2543.8333333333335, "eval_completions/mean_length": 1003.161122639974, "eval_completions/mean_terminated_length": 1003.161122639974, "eval_completions/min_length": 340.5, "eval_completions/min_terminated_length": 340.5, "eval_loss": 0.0, "eval_num_tokens": 1985591043.0, "eval_reward": 1.2930545409520466, "eval_reward_std": 0.30748553077379864, "eval_rewards/accuracy_reward": 0.7300347089767456, "eval_rewards/brier_reward": 0.8560590147972107, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 1.0, "eval_rewards/mean_confidence_reward": 0.7638888756434122, "eval_runtime": 149.0266, "eval_samples_per_second": 6.71, "eval_signal/accuracy_reward/centered_abs_mean": 0.3828667551279068, "eval_signal/accuracy_reward/group_std_mean": 0.4424740672111511, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1914333775639534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.1914333775639534, "eval_signal/advantage_abs_mean": 0.2585411916176478, "eval_signal/advantage_pre_scale_abs_mean": 0.2585411916176478, "eval_signal/advantage_pre_scale_std": 0.3062230596939723, "eval_signal/advantage_std": 0.3062230596939723, "eval_signal/brier_reward/centered_abs_mean": 0.16724719355503717, "eval_signal/brier_reward/group_std_mean": 0.2248398462931315, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08362359677751859, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08362359677751859, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.1763671636581421, "eval_signal/mean_confidence_reward/group_std_mean": 0.21374296893676123, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7636715862560475e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7636715862560475e-06, "eval_steps_per_second": 0.04, "step": 750 }, { "epoch": 1.8028846153846154, "step": 750, "train_probe_calibration/aurc": 0.09669643522423507, "train_probe_calibration/batch_distribution_entropy": 0.4769103370021153, "train_probe_calibration/batch_entropy_100bins": 0.2612872498607003, "train_probe_calibration/batch_entropy_10bins": 0.4769103370021153, "train_probe_calibration/batch_entropy_50bins": 0.30758312294373313, "train_probe_calibration/batch_uniqueness": 0.033334749723803854, "train_probe_calibration/confidence_entropy": 0.4227911882444281, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.7543554006968641, "train_probe_calibration/coverage@15%": 0.8658536585365854, "train_probe_calibration/coverage@20%": 0.9538327526132404, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.4769103370021153, "train_probe_calibration/distribution_entropy_100": 0.2612872498607003, "train_probe_calibration/ece": 0.03911149825783945, "train_probe_calibration/mean_confidence": 0.7772648083623694, "train_probe_calibration/unique_confidence_per_question": 0.006944444444444444, "train_probe_calibration/unique_confidences": 8, "train_probe_completions/clipped_ratio": 0.005902777777777794, "train_probe_completions/max_length": 2893.0, "train_probe_completions/max_terminated_length": 2893.0, "train_probe_completions/mean_length": 1008.5927327473959, "train_probe_completions/mean_terminated_length": 1014.651611328125, "train_probe_completions/min_length": 121.33333333333333, "train_probe_completions/min_terminated_length": 291.1666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 1985591043.0, "train_probe_reward": 1.3298992117245991, "train_probe_reward_std": 0.2911212046941121, "train_probe_rewards/accuracy_reward": 0.7821180621782938, "train_probe_rewards/brier_reward": 0.8811371823151907, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.996527781089147, "train_probe_rewards/mean_confidence_reward": 0.7745659252007803, "train_probe_runtime": 179.8887, "train_probe_samples_per_second": 5.559, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3321940104166667, "train_probe_signal/accuracy_reward/group_std_mean": 0.4113796005646388, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16609700520833334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.16609700520833334, "train_probe_signal/advantage_abs_mean": 0.22670255601406097, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22670255601406097, "train_probe_signal/advantage_pre_scale_std": 0.28947464128335315, "train_probe_signal/advantage_std": 0.28947464128335315, "train_probe_signal/brier_reward/centered_abs_mean": 0.14373591418067613, "train_probe_signal/brier_reward/group_std_mean": 0.20871607214212418, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07186795709033807, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07186795709033807, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.006727430348594983, "train_probe_signal/format_reward/group_std_mean": 0.019641855110724766, "train_probe_signal/format_reward/group_zero_std_frac": 0.8888889054457346, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0033637151742974916, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0033637151742974916, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.16852210462093353, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.20680046826601028, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6852209986003193e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6852209986003193e-06, "train_probe_steps_per_second": 0.033 }, { "calibration/aurc": 0.09349917406678779, "calibration/batch_distribution_entropy": 0.44226068472105906, "calibration/batch_entropy_100bins": 0.24420624657005888, "calibration/batch_entropy_10bins": 0.44226068472105906, "calibration/batch_entropy_50bins": 0.2874756422383079, "calibration/batch_uniqueness": -0.006146544850521329, "calibration/confidence_entropy": 0.43103626804361667, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5820917645778938, "calibration/coverage@15%": 0.7831674281984334, "calibration/coverage@20%": 0.9045882288946909, "calibration/coverage@25%": 0.9618798955613578, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.35444816144473457, "calibration/distribution_entropy_10": 0.44226068472105906, "calibration/distribution_entropy_100": 0.24420624657005888, "calibration/ece": 0.09561221714534365, "calibration/mean_confidence": 0.7642822563098348, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001649305555555558, "completions/max_length": 3528.4, "completions/max_terminated_length": 3528.4, "completions/mean_length": 991.83125, "completions/mean_terminated_length": 993.4691162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 271.6, "epoch": 1.8149038461538463, "grad_norm": 0.000718863622751087, "learning_rate": 1.7127403846153848e-06, "loss": -0.0004, "num_tokens": 2000126795.0, "reward": 1.3270102977752685, "reward_std": 0.09794525802135468, "rewards/accuracy_reward": 0.7756944537162781, "rewards/brier_reward": 0.8801336884498596, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9981770753860474, "rewards/mean_confidence_reward": 0.7755208492279053, "sampling/batch_mean_priority_error": 0.04309707667727515, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010810686834156513, "sampling/priority_kl": 0.029999897256493567, "sampling/priority_scale": 0.8023889839882031, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 5.8038592396769675e-05, "sampling/prob_min": 2.058104109892156e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8072000026702881, "sampling/prompt_draws_total": 54216.0, "sampling/seen_fraction": 0.9149400115013122, "sampling/unseen_fraction": 0.08505998849868775, "signal/accuracy_reward/centered_abs_mean": 0.09638671875, "signal/accuracy_reward/group_std_mean": 0.12863395363092422, "signal/accuracy_reward/group_zero_std_frac": 0.6277777791023255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.048193359375, "signal/advantage_abs_mean": 0.07153508141636848, "signal/advantage_pre_scale_abs_mean": 0.07153508141636848, "signal/advantage_pre_scale_std": 0.15592266619205475, "signal/advantage_std": 0.15592266619205475, "signal/brier_reward/centered_abs_mean": 0.06087609231472015, "signal/brier_reward/group_std_mean": 0.08291507065296173, "signal/brier_reward/group_zero_std_frac": 0.42777777910232545, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030438046157360076, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.030438046157360076, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0034559461520984767, "signal/format_reward/group_std_mean": 0.008465695334598422, "signal/format_reward/group_zero_std_frac": 0.9583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017279730760492384, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0017279730760492384, "signal/mean_confidence_reward/centered_abs_mean": 0.046497957408428194, "signal/mean_confidence_reward/group_std_mean": 0.06168990060687065, "signal/mean_confidence_reward/group_zero_std_frac": 0.475, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.649795300792903e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.649795300792903e-07, "step": 755 }, { "calibration/aurc": 0.14914645317799086, "calibration/batch_distribution_entropy": 0.4790925837613993, "calibration/batch_entropy_100bins": 0.26074205573889475, "calibration/batch_entropy_10bins": 0.4790925837613993, "calibration/batch_entropy_50bins": 0.30694132924471074, "calibration/batch_uniqueness": 0.06379253474589286, "calibration/confidence_entropy": 0.4314014961182906, "calibration/coverage@0%": 0.17604166666666668, "calibration/coverage@1%": 0.19479166666666667, "calibration/coverage@10%": 0.3557291666666667, "calibration/coverage@15%": 0.5234375, "calibration/coverage@20%": 0.5401041666666667, "calibration/coverage@25%": 0.8331320713664055, "calibration/coverage@30%": 0.8889809073107051, "calibration/coverage@5%": 0.3557291666666667, "calibration/distribution_entropy_10": 0.4790925837613993, "calibration/distribution_entropy_100": 0.26074205573889475, "calibration/ece": 0.1447995539599652, "calibration/mean_confidence": 0.7540081048738035, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002430555555555558, "completions/max_length": 3893.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 1057.9886352539063, "completions/mean_terminated_length": 1060.56220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 274.0, "epoch": 1.8269230769230769, "grad_norm": 0.0006375846569426358, "learning_rate": 1.682692307692308e-06, "loss": -0.0004, "num_tokens": 2015390120.0, "reward": 1.3015799283981324, "reward_std": 0.10563021302223205, "rewards/accuracy_reward": 0.73125, "rewards/brier_reward": 0.8744123339653015, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9974826455116272, "rewards/mean_confidence_reward": 0.742907989025116, "sampling/batch_mean_priority_error": 0.04129415914572554, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010846528597176075, "sampling/priority_kl": 0.02999998964369297, "sampling/priority_scale": 0.8044263661140576, "sampling/prob_entropy": 10.278950119018555, "sampling/prob_max": 5.825789921800606e-05, "sampling/prob_min": 2.0604497694876046e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8191999912261962, "sampling/prompt_draws_total": 54576.0, "sampling/seen_fraction": 0.9165599942207336, "sampling/unseen_fraction": 0.08344000577926636, "signal/accuracy_reward/centered_abs_mean": 0.1109917551279068, "signal/accuracy_reward/group_std_mean": 0.14689186215400696, "signal/accuracy_reward/group_zero_std_frac": 0.5833333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0554958775639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0554958775639534, "signal/advantage_abs_mean": 0.07737808376550674, "signal/advantage_pre_scale_abs_mean": 0.07737808376550674, "signal/advantage_pre_scale_std": 0.15911754965782166, "signal/advantage_std": 0.15911754965782166, "signal/brier_reward/centered_abs_mean": 0.06404606327414512, "signal/brier_reward/group_std_mean": 0.08760401755571365, "signal/brier_reward/group_zero_std_frac": 0.36111111640930177, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03202303163707256, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03202303163707256, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004616970429196954, "signal/format_reward/group_std_mean": 0.00960054025053978, "signal/format_reward/group_zero_std_frac": 0.9583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002308485214598477, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002308485214598477, "signal/mean_confidence_reward/centered_abs_mean": 0.05342720374464989, "signal/mean_confidence_reward/group_std_mean": 0.0703733816742897, "signal/mean_confidence_reward/group_zero_std_frac": 0.4, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.342720271528379e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.342720271528379e-07, "step": 760 }, { "calibration/aurc": 0.13740616837098713, "calibration/batch_distribution_entropy": 0.5073045708548226, "calibration/batch_entropy_100bins": 0.27755860925825987, "calibration/batch_entropy_10bins": 0.5073045708548226, "calibration/batch_entropy_50bins": 0.32673750395815093, "calibration/batch_uniqueness": 0.11712409118522035, "calibration/confidence_entropy": 0.437041303338047, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5658283694220788, "calibration/coverage@15%": 0.5992897448954018, "calibration/coverage@20%": 0.6863547761199131, "calibration/coverage@25%": 0.7145042539267015, "calibration/coverage@30%": 0.9077553851174935, "calibration/coverage@5%": 0.3893215532286213, "calibration/distribution_entropy_10": 0.5073045708548226, "calibration/distribution_entropy_100": 0.27755860925825987, "calibration/ece": 0.10657638926861046, "calibration/mean_confidence": 0.733388931702505, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111094, "completions/max_length": 3775.6, "completions/max_terminated_length": 3775.6, "completions/mean_length": 1080.2004516601562, "completions/mean_terminated_length": 1086.2259155273437, "completions/min_length": 0.0, "completions/min_terminated_length": 301.2, "epoch": 1.8389423076923077, "grad_norm": 0.00046238666982389987, "learning_rate": 1.6526442307692309e-06, "loss": -0.0029, "num_tokens": 2030915565.0, "reward": 1.3130412101745605, "reward_std": 0.09514122605323791, "rewards/accuracy_reward": 0.7511284708976745, "rewards/brier_reward": 0.8805816173553467, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9943576455116272, "rewards/mean_confidence_reward": 0.7333853960037231, "sampling/batch_mean_priority_error": 0.03588200018134492, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.175, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010885423980653286, "sampling/priority_kl": 0.030000252276659013, "sampling/priority_scale": 0.8076436937553808, "sampling/prob_entropy": 10.278957939147949, "sampling/prob_max": 5.85400739510078e-05, "sampling/prob_min": 2.0618402777472512e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8312000036239624, "sampling/prompt_draws_total": 54936.0, "sampling/seen_fraction": 0.9185333371162414, "sampling/unseen_fraction": 0.08146666288375855, "signal/accuracy_reward/centered_abs_mean": 0.08905707374215126, "signal/accuracy_reward/group_std_mean": 0.1289079800248146, "signal/accuracy_reward/group_zero_std_frac": 0.5916666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04452853687107563, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04452853687107563, "signal/advantage_abs_mean": 0.06434951797127723, "signal/advantage_pre_scale_abs_mean": 0.06434951797127723, "signal/advantage_pre_scale_std": 0.14531429409980773, "signal/advantage_std": 0.14531429409980773, "signal/brier_reward/centered_abs_mean": 0.05801150500774384, "signal/brier_reward/group_std_mean": 0.08372148722410203, "signal/brier_reward/group_zero_std_frac": 0.34166666865348816, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02900575250387192, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02900575250387192, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007872178684920073, "signal/format_reward/group_std_mean": 0.01580634731799364, "signal/format_reward/group_zero_std_frac": 0.9277778029441833, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003936089342460036, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003936089342460036, "signal/mean_confidence_reward/centered_abs_mean": 0.05199273899197578, "signal/mean_confidence_reward/group_std_mean": 0.06956614181399345, "signal/mean_confidence_reward/group_zero_std_frac": 0.3694444537162781, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.199273573452956e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.199273573452956e-07, "step": 765 }, { "calibration/aurc": 0.1058074407278425, "calibration/batch_distribution_entropy": 0.5508122068033574, "calibration/batch_entropy_100bins": 0.30632343612808477, "calibration/batch_entropy_10bins": 0.5508122068033574, "calibration/batch_entropy_50bins": 0.36059899273830937, "calibration/batch_uniqueness": 0.24586654214986564, "calibration/confidence_entropy": 0.4377355294125821, "calibration/coverage@0%": 0.11830238726790451, "calibration/coverage@1%": 0.2645164864846147, "calibration/coverage@10%": 0.49349614293690847, "calibration/coverage@15%": 0.6703091203709031, "calibration/coverage@20%": 0.8773928715829582, "calibration/coverage@25%": 0.9046154478731934, "calibration/coverage@30%": 0.9287624295004899, "calibration/coverage@5%": 0.4709064704031346, "calibration/distribution_entropy_10": 0.5508122068033574, "calibration/distribution_entropy_100": 0.30632343612808477, "calibration/ece": 0.124238298273329, "calibration/mean_confidence": 0.7130441251754973, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003645833333333326, "completions/max_length": 3689.8, "completions/max_terminated_length": 3689.8, "completions/mean_length": 1063.927783203125, "completions/mean_terminated_length": 1067.8088623046874, "completions/min_length": 0.0, "completions/min_terminated_length": 272.4, "epoch": 1.8509615384615383, "grad_norm": 0.0005264647770673037, "learning_rate": 1.622596153846154e-06, "loss": -0.0032, "num_tokens": 2046281389.0, "reward": 1.2954176902770995, "reward_std": 0.10103552639484406, "rewards/accuracy_reward": 0.7413194298744201, "rewards/brier_reward": 0.8531475901603699, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9963541626930237, "rewards/mean_confidence_reward": 0.7126562595367432, "sampling/batch_mean_priority_error": 0.05059813466010541, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.14444444444444446, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010923280008137226, "sampling/priority_kl": 0.02999989278614521, "sampling/priority_scale": 0.8107421695487573, "sampling/prob_entropy": 10.278948402404785, "sampling/prob_max": 5.8815087686525655e-05, "sampling/prob_min": 2.0632501764339396e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8432000160217286, "sampling/prompt_draws_total": 55296.0, "sampling/seen_fraction": 0.9204333186149597, "sampling/unseen_fraction": 0.07956668138504028, "signal/accuracy_reward/centered_abs_mean": 0.10087890475988388, "signal/accuracy_reward/group_std_mean": 0.14286032617092131, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05043945237994194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05043945237994194, "signal/advantage_abs_mean": 0.06797776594758034, "signal/advantage_pre_scale_abs_mean": 0.06797776594758034, "signal/advantage_pre_scale_std": 0.15065972805023192, "signal/advantage_std": 0.15065972805023192, "signal/brier_reward/centered_abs_mean": 0.06256764978170395, "signal/brier_reward/group_std_mean": 0.08869975209236144, "signal/brier_reward/group_zero_std_frac": 0.3194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03128382489085198, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03128382489085198, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006759982544463128, "signal/format_reward/group_std_mean": 0.015655810199677943, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003379991272231564, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003379991272231564, "signal/mean_confidence_reward/centered_abs_mean": 0.049361446499824525, "signal/mean_confidence_reward/group_std_mean": 0.06687793955206871, "signal/mean_confidence_reward/group_zero_std_frac": 0.35833333134651185, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.936144591738412e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.936144591738412e-07, "step": 770 }, { "calibration/aurc": 0.10568492932644938, "calibration/batch_distribution_entropy": 0.58030989898059, "calibration/batch_entropy_100bins": 0.3294897567753045, "calibration/batch_entropy_10bins": 0.58030989898059, "calibration/batch_entropy_50bins": 0.3878700105762888, "calibration/batch_uniqueness": 0.33344796337384425, "calibration/confidence_entropy": 0.4726254435927121, "calibration/coverage@0%": 0.10837696335078535, "calibration/coverage@1%": 0.1778963436381015, "calibration/coverage@10%": 0.5566325895366886, "calibration/coverage@15%": 0.7742783533429363, "calibration/coverage@20%": 0.8180596376602803, "calibration/coverage@25%": 0.8840599455040872, "calibration/coverage@30%": 0.934741144414169, "calibration/coverage@5%": 0.4609998417910961, "calibration/distribution_entropy_10": 0.58030989898059, "calibration/distribution_entropy_100": 0.3294897567753045, "calibration/ece": 0.11717980862289623, "calibration/mean_confidence": 0.7064151465581333, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005208333333333348, "completions/max_length": 3900.6, "completions/max_terminated_length": 3900.6, "completions/mean_length": 1092.501904296875, "completions/mean_terminated_length": 1098.26162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 316.2, "epoch": 1.8629807692307692, "grad_norm": 0.00046158998156897724, "learning_rate": 1.592548076923077e-06, "loss": -0.0033, "num_tokens": 2061994467.0, "reward": 1.3138395309448243, "reward_std": 0.10530554354190827, "rewards/accuracy_reward": 0.7564236044883728, "rewards/brier_reward": 0.8764496445655823, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9947916507720947, "rewards/mean_confidence_reward": 0.7057465314865112, "sampling/batch_mean_priority_error": 0.029372394873767048, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.14166666666666666, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010955540835857392, "sampling/priority_kl": 0.030000030994415283, "sampling/priority_scale": 0.8133450329536572, "sampling/prob_entropy": 10.278959846496582, "sampling/prob_max": 5.906694204895757e-05, "sampling/prob_min": 2.065128392132465e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8552000045776367, "sampling/prompt_draws_total": 55656.0, "sampling/seen_fraction": 0.9221133351325989, "sampling/unseen_fraction": 0.07788666486740112, "signal/accuracy_reward/centered_abs_mean": 0.1110568568110466, "signal/accuracy_reward/group_std_mean": 0.15447987318038942, "signal/accuracy_reward/group_zero_std_frac": 0.5305555701255799, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0555284284055233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0555284284055233, "signal/advantage_abs_mean": 0.07142583802342414, "signal/advantage_pre_scale_abs_mean": 0.07142583802342414, "signal/advantage_pre_scale_std": 0.1522584453225136, "signal/advantage_std": 0.1522584453225136, "signal/brier_reward/centered_abs_mean": 0.05967013090848923, "signal/brier_reward/group_std_mean": 0.08542276471853257, "signal/brier_reward/group_zero_std_frac": 0.2611111134290695, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029835065454244615, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029835065454244615, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.008289930713362991, "signal/format_reward/group_std_mean": 0.017449943348765373, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0041449653566814956, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0041449653566814956, "signal/mean_confidence_reward/centered_abs_mean": 0.052318794280290605, "signal/mean_confidence_reward/group_std_mean": 0.07218771576881408, "signal/mean_confidence_reward/group_zero_std_frac": 0.27222222089767456, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.231879185885191e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.231879185885191e-07, "step": 775 }, { "calibration/aurc": 0.12650340119612366, "calibration/batch_distribution_entropy": 0.6069324227226676, "calibration/batch_entropy_100bins": 0.3344824618210083, "calibration/batch_entropy_10bins": 0.6069324227226676, "calibration/batch_entropy_50bins": 0.3937473421748004, "calibration/batch_uniqueness": 0.3424567241129418, "calibration/confidence_entropy": 0.463840319597579, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5450485483621087, "calibration/coverage@15%": 0.7180795302311593, "calibration/coverage@20%": 0.7525376109546658, "calibration/coverage@25%": 0.8271407543664193, "calibration/coverage@30%": 0.8626050167685081, "calibration/coverage@5%": 0.28555501112964154, "calibration/distribution_entropy_10": 0.6069324227226676, "calibration/distribution_entropy_100": 0.3344824618210083, "calibration/ece": 0.1031593465351158, "calibration/mean_confidence": 0.7074604731122898, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002430555555555558, "completions/max_length": 3569.0, "completions/max_terminated_length": 3569.0, "completions/mean_length": 1031.5218994140625, "completions/mean_terminated_length": 1034.0815063476562, "completions/min_length": 0.0, "completions/min_terminated_length": 288.8, "epoch": 1.875, "grad_norm": 0.0005521697457879782, "learning_rate": 1.5625e-06, "loss": -0.0004, "num_tokens": 2076971551.0, "reward": 1.3163609266281129, "reward_std": 0.09743509739637375, "rewards/accuracy_reward": 0.7552951455116272, "rewards/brier_reward": 0.8798429131507873, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9975694417953491, "rewards/mean_confidence_reward": 0.7181510210037232, "sampling/batch_mean_priority_error": 0.033419626889365825, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11944444444444444, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.010983368568122386, "sampling/priority_kl": 0.03000000976026058, "sampling/priority_scale": 0.8154753029579297, "sampling/prob_entropy": 10.278954124450683, "sampling/prob_max": 5.929811304667964e-05, "sampling/prob_min": 2.067512359644752e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8672000169754028, "sampling/prompt_draws_total": 56016.0, "sampling/seen_fraction": 0.9235666632652283, "sampling/unseen_fraction": 0.07643333673477173, "signal/accuracy_reward/centered_abs_mean": 0.10843641459941863, "signal/accuracy_reward/group_std_mean": 0.14697835743427276, "signal/accuracy_reward/group_zero_std_frac": 0.5666666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05421820729970932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05421820729970932, "signal/advantage_abs_mean": 0.06979118883609772, "signal/advantage_pre_scale_abs_mean": 0.06979118883609772, "signal/advantage_pre_scale_std": 0.14512248635292052, "signal/advantage_std": 0.14512248635292052, "signal/brier_reward/centered_abs_mean": 0.054565370082855225, "signal/brier_reward/group_std_mean": 0.07615428417921066, "signal/brier_reward/group_zero_std_frac": 0.3194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027282685041427612, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027282685041427612, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0044596354942768816, "signal/format_reward/group_std_mean": 0.009759350214153528, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0022298177471384408, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0022298177471384408, "signal/mean_confidence_reward/centered_abs_mean": 0.05360189527273178, "signal/mean_confidence_reward/group_std_mean": 0.07146198004484176, "signal/mean_confidence_reward/group_zero_std_frac": 0.32222222089767455, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.360189334169263e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.360189334169263e-07, "step": 780 }, { "calibration/aurc": 0.08575687912745038, "calibration/batch_distribution_entropy": 0.6169860530156981, "calibration/batch_entropy_100bins": 0.34476735909655454, "calibration/batch_entropy_10bins": 0.6169860530156981, "calibration/batch_entropy_50bins": 0.40585455683932975, "calibration/batch_uniqueness": 0.4083399131337545, "calibration/confidence_entropy": 0.477630784825506, "calibration/coverage@0%": 0.238339044821584, "calibration/coverage@1%": 0.238339044821584, "calibration/coverage@10%": 0.7210201090192845, "calibration/coverage@15%": 0.7512832669140214, "calibration/coverage@20%": 0.8411735605331867, "calibration/coverage@25%": 0.85213961797444, "calibration/coverage@30%": 0.9281544592551876, "calibration/coverage@5%": 0.6155446669873117, "calibration/distribution_entropy_10": 0.6169860530156981, "calibration/distribution_entropy_100": 0.34476735909655454, "calibration/ece": 0.13663992573633807, "calibration/mean_confidence": 0.6788759719550181, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003211805555555558, "completions/max_length": 3779.6, "completions/max_terminated_length": 3779.6, "completions/mean_length": 1086.3251708984376, "completions/mean_terminated_length": 1089.8551513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 324.8, "epoch": 1.8870192307692308, "grad_norm": 0.0005370154976844788, "learning_rate": 1.5324519230769232e-06, "loss": -0.0026, "num_tokens": 2092606657.0, "reward": 1.2937704801559449, "reward_std": 0.09794444888830185, "rewards/accuracy_reward": 0.7217881798744201, "rewards/brier_reward": 0.8689505338668824, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9967881917953492, "rewards/mean_confidence_reward": 0.7033767580986023, "sampling/batch_mean_priority_error": 0.0366663303187036, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11388888888888889, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011012093536555767, "sampling/priority_kl": 0.029999838769435884, "sampling/priority_scale": 0.8172821938758716, "sampling/prob_entropy": 10.278954315185548, "sampling/prob_max": 5.9512741427170115e-05, "sampling/prob_min": 2.0624500029953198e-05, "sampling/prompt_draws_max": 7.2, "sampling/prompt_draws_mean": 1.8791999816894531, "sampling/prompt_draws_total": 56376.0, "sampling/seen_fraction": 0.9249000072479248, "sampling/unseen_fraction": 0.0750999927520752, "signal/accuracy_reward/centered_abs_mean": 0.10648328810930252, "signal/accuracy_reward/group_std_mean": 0.14075976759195327, "signal/accuracy_reward/group_zero_std_frac": 0.5916666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05324164405465126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05324164405465126, "signal/advantage_abs_mean": 0.07044520229101181, "signal/advantage_pre_scale_abs_mean": 0.07044520229101181, "signal/advantage_pre_scale_std": 0.14834425449371338, "signal/advantage_std": 0.14834425449371338, "signal/brier_reward/centered_abs_mean": 0.06095951944589615, "signal/brier_reward/group_std_mean": 0.08446675837039948, "signal/brier_reward/group_zero_std_frac": 0.2888888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030479759722948075, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.030479759722948075, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00603841149713844, "signal/format_reward/group_std_mean": 0.014342158287763595, "signal/format_reward/group_zero_std_frac": 0.9305555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00301920574856922, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00301920574856922, "signal/mean_confidence_reward/centered_abs_mean": 0.055245233327150346, "signal/mean_confidence_reward/group_std_mean": 0.07242218255996705, "signal/mean_confidence_reward/group_zero_std_frac": 0.30833333134651186, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.524522975974832e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.524522975974832e-07, "step": 785 }, { "calibration/aurc": 0.09134358211133084, "calibration/batch_distribution_entropy": 0.5184122528279234, "calibration/batch_entropy_100bins": 0.28326808197942577, "calibration/batch_entropy_10bins": 0.5184122528279234, "calibration/batch_entropy_50bins": 0.3334586028670128, "calibration/batch_uniqueness": 0.1333611226039658, "calibration/confidence_entropy": 0.43366741381902807, "calibration/coverage@0%": 0.13507853403141362, "calibration/coverage@1%": 0.39809936736474694, "calibration/coverage@10%": 0.5081451788830715, "calibration/coverage@15%": 0.5822916666666667, "calibration/coverage@20%": 0.9163585296684118, "calibration/coverage@25%": 0.9555219240837696, "calibration/coverage@30%": 0.9910994764397906, "calibration/coverage@5%": 0.49872109511343804, "calibration/distribution_entropy_10": 0.5184122528279234, "calibration/distribution_entropy_100": 0.28326808197942577, "calibration/ece": 0.13128326788830713, "calibration/mean_confidence": 0.750606457242583, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003819444444444442, "completions/max_length": 3623.6, "completions/max_terminated_length": 3623.6, "completions/mean_length": 1019.901220703125, "completions/mean_terminated_length": 1023.8256713867188, "completions/min_length": 0.0, "completions/min_terminated_length": 314.8, "epoch": 1.8990384615384617, "grad_norm": 0.0005847892607562244, "learning_rate": 1.5024038461538462e-06, "loss": -0.0026, "num_tokens": 2107456751.0, "reward": 1.3047247171401977, "reward_std": 0.09398773610591889, "rewards/accuracy_reward": 0.7427083373069763, "rewards/brier_reward": 0.8705459713935852, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9961805462837219, "rewards/mean_confidence_reward": 0.726623260974884, "sampling/batch_mean_priority_error": 0.03973864163030571, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16666666666666669, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011049842275679112, "sampling/priority_kl": 0.030000048875808715, "sampling/priority_scale": 0.8210367739433423, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 5.9824989875778556e-05, "sampling/prob_min": 2.0322882119216956e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.8911999940872193, "sampling/prompt_draws_total": 56736.0, "sampling/seen_fraction": 0.926853334903717, "sampling/unseen_fraction": 0.07314666509628295, "signal/accuracy_reward/centered_abs_mean": 0.09969618171453476, "signal/accuracy_reward/group_std_mean": 0.1376244455575943, "signal/accuracy_reward/group_zero_std_frac": 0.5861111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04984809085726738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04984809085726738, "signal/advantage_abs_mean": 0.06447783783078194, "signal/advantage_pre_scale_abs_mean": 0.06447783783078194, "signal/advantage_pre_scale_std": 0.1427443265914917, "signal/advantage_std": 0.1427443265914917, "signal/brier_reward/centered_abs_mean": 0.052842124551534655, "signal/brier_reward/group_std_mean": 0.07576082348823547, "signal/brier_reward/group_zero_std_frac": 0.36944444179534913, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026421062275767328, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.026421062275767328, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006987847248092293, "signal/format_reward/group_std_mean": 0.015508862398564815, "signal/format_reward/group_zero_std_frac": 0.9277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0034939236240461467, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0034939236240461467, "signal/mean_confidence_reward/centered_abs_mean": 0.04846192449331284, "signal/mean_confidence_reward/group_std_mean": 0.064261145144701, "signal/mean_confidence_reward/group_zero_std_frac": 0.39166667461395266, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.846192496188451e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.846192496188451e-07, "step": 790 }, { "calibration/aurc": 0.11700290971783592, "calibration/batch_distribution_entropy": 0.3694812329975735, "calibration/batch_entropy_100bins": 0.19917089474001431, "calibration/batch_entropy_10bins": 0.3694812329975735, "calibration/batch_entropy_50bins": 0.2344607547298671, "calibration/batch_uniqueness": -0.22014402630499075, "calibration/confidence_entropy": 0.39032257527187286, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.1828125, "calibration/coverage@10%": 0.5609375, "calibration/coverage@15%": 0.7394718233246301, "calibration/coverage@20%": 0.7785900783289816, "calibration/coverage@25%": 0.7885117493472584, "calibration/coverage@30%": 0.9650130548302872, "calibration/coverage@5%": 0.5229166666666666, "calibration/distribution_entropy_10": 0.3694812329975735, "calibration/distribution_entropy_100": 0.19917089474001431, "calibration/ece": 0.11351827676240207, "calibration/mean_confidence": 0.8136020452567451, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0012152777777777902, "completions/max_length": 3386.6, "completions/max_terminated_length": 3386.6, "completions/mean_length": 1010.8636352539063, "completions/mean_terminated_length": 1012.0790161132812, "completions/min_length": 0.0, "completions/min_terminated_length": 291.4, "epoch": 1.9110576923076923, "grad_norm": 0.0005554448580369353, "learning_rate": 1.4723557692307693e-06, "loss": -0.0008, "num_tokens": 2122168588.0, "reward": 1.321057105064392, "reward_std": 0.09689114838838578, "rewards/accuracy_reward": 0.7663194417953492, "rewards/brier_reward": 0.8771684169769287, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9986111164093018, "rewards/mean_confidence_reward": 0.7633159756660461, "sampling/batch_mean_priority_error": 0.04098942885678015, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011086820624768734, "sampling/priority_kl": 0.029999766498804092, "sampling/priority_scale": 0.8243461310630664, "sampling/prob_entropy": 10.278958702087403, "sampling/prob_max": 6.011749355820939e-05, "sampling/prob_min": 2.0333567590569146e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9031999826431274, "sampling/prompt_draws_total": 57096.0, "sampling/seen_fraction": 0.9286133170127868, "sampling/unseen_fraction": 0.07138668298721314, "signal/accuracy_reward/centered_abs_mean": 0.0936957448720932, "signal/accuracy_reward/group_std_mean": 0.1331271708011627, "signal/accuracy_reward/group_zero_std_frac": 0.580555546283722, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468478724360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0468478724360466, "signal/advantage_abs_mean": 0.06651908457279206, "signal/advantage_pre_scale_abs_mean": 0.06651908457279206, "signal/advantage_pre_scale_std": 0.14965884685516356, "signal/advantage_std": 0.14965884685516356, "signal/brier_reward/centered_abs_mean": 0.05872168764472008, "signal/brier_reward/group_std_mean": 0.08186099231243134, "signal/brier_reward/group_zero_std_frac": 0.4083333373069763, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02936084382236004, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02936084382236004, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0026692707906477153, "signal/format_reward/group_std_mean": 0.007258860021829605, "signal/format_reward/group_zero_std_frac": 0.9611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013346353953238577, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013346353953238577, "signal/mean_confidence_reward/centered_abs_mean": 0.043116336315870286, "signal/mean_confidence_reward/group_std_mean": 0.057510948926210406, "signal/mean_confidence_reward/group_zero_std_frac": 0.45555556416511533, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.311633404086024e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.311633404086024e-07, "step": 795 }, { "calibration/aurc": 0.0531468315919163, "calibration/batch_distribution_entropy": 0.39870880529696723, "calibration/batch_entropy_100bins": 0.2124436541205837, "calibration/batch_entropy_10bins": 0.39870880529696723, "calibration/batch_entropy_50bins": 0.2500852323212258, "calibration/batch_uniqueness": -0.1439639447652643, "calibration/confidence_entropy": 0.4015295033087928, "calibration/coverage@0%": 0.13263707571801567, "calibration/coverage@1%": 0.32384954308093994, "calibration/coverage@10%": 0.6923566545279648, "calibration/coverage@15%": 0.9225445896889746, "calibration/coverage@20%": 0.9765013054830287, "calibration/coverage@25%": 0.9926892950391645, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6614465668086665, "calibration/distribution_entropy_10": 0.39870880529696723, "calibration/distribution_entropy_100": 0.2124436541205837, "calibration/ece": 0.11906999650726022, "calibration/mean_confidence": 0.8056827060716415, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0014756944444444641, "completions/max_length": 3871.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 1022.5205078125, "completions/mean_terminated_length": 1024.047265625, "completions/min_length": 0.0, "completions/min_terminated_length": 298.8, "epoch": 1.9230769230769231, "grad_norm": 0.0007274962845258415, "learning_rate": 1.4423076923076922e-06, "loss": -0.0007, "num_tokens": 2137044056.0, "reward": 1.3031952381134033, "reward_std": 0.11121167093515397, "rewards/accuracy_reward": 0.7424479246139526, "rewards/brier_reward": 0.8657499551773071, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9981770873069763, "rewards/mean_confidence_reward": 0.7726735830307007, "sampling/batch_mean_priority_error": 0.05381679004621047, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.125, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011131892912089825, "sampling/priority_kl": 0.030000000819563867, "sampling/priority_scale": 0.8267765342490747, "sampling/prob_entropy": 10.278957557678222, "sampling/prob_max": 6.0364877572283146e-05, "sampling/prob_min": 2.0352091814856978e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9151999950408936, "sampling/prompt_draws_total": 57456.0, "sampling/seen_fraction": 0.9300533413887024, "sampling/unseen_fraction": 0.0699466586112976, "signal/accuracy_reward/centered_abs_mean": 0.10494249016046524, "signal/accuracy_reward/group_std_mean": 0.1474722906947136, "signal/accuracy_reward/group_zero_std_frac": 0.544444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05247124508023262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05247124508023262, "signal/advantage_abs_mean": 0.07756313532590867, "signal/advantage_pre_scale_abs_mean": 0.07756313532590867, "signal/advantage_pre_scale_std": 0.16611856520175933, "signal/advantage_std": 0.16611856520175933, "signal/brier_reward/centered_abs_mean": 0.06642551645636559, "signal/brier_reward/group_std_mean": 0.09464680701494217, "signal/brier_reward/group_zero_std_frac": 0.3694444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033212758228182794, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033212758228182794, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0033691405667923393, "signal/format_reward/group_std_mean": 0.007633072696626187, "signal/format_reward/group_zero_std_frac": 0.9638888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016845702833961696, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016845702833961696, "signal/mean_confidence_reward/centered_abs_mean": 0.0432563029229641, "signal/mean_confidence_reward/group_std_mean": 0.060444879531860354, "signal/mean_confidence_reward/group_zero_std_frac": 0.4222222328186035, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.3256300159555393e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.3256300159555393e-07, "step": 800 }, { "epoch": 1.9230769230769231, "eval_calibration/aurc": 0.15189747488764554, "eval_calibration/batch_distribution_entropy": 0.452406431145014, "eval_calibration/batch_entropy_100bins": 0.24782939662713582, "eval_calibration/batch_entropy_10bins": 0.452406431145014, "eval_calibration/batch_entropy_50bins": 0.2917407558634221, "eval_calibration/batch_uniqueness": -0.05053761814744801, "eval_calibration/confidence_entropy": 0.4086416396546222, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.7634782608695653, "eval_calibration/coverage@20%": 0.82, "eval_calibration/coverage@25%": 0.9434782608695652, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.452406431145014, "eval_calibration/distribution_entropy_100": 0.24782939662713582, "eval_calibration/ece": 0.06547826086956546, "eval_calibration/mean_confidence": 0.7889565217391306, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.0017361111111111234, "eval_completions/max_length": 2835.6666666666665, "eval_completions/max_terminated_length": 2835.6666666666665, "eval_completions/mean_length": 1001.4951680501302, "eval_completions/mean_terminated_length": 1003.2116902669271, "eval_completions/min_length": 236.66666666666666, "eval_completions/min_terminated_length": 330.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 2137044056.0, "eval_reward": 1.2835538585980732, "eval_reward_std": 0.32838855187098187, "eval_rewards/accuracy_reward": 0.7222222288449606, "eval_rewards/brier_reward": 0.8466059267520905, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9982638955116272, "eval_rewards/mean_confidence_reward": 0.7875867386658987, "eval_runtime": 181.7797, "eval_samples_per_second": 5.501, "eval_signal/accuracy_reward/centered_abs_mean": 0.3897569427887599, "eval_signal/accuracy_reward/group_std_mean": 0.4464738667011261, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19487847139437994, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19487847139437994, "eval_signal/advantage_abs_mean": 0.27775069574515027, "eval_signal/advantage_pre_scale_abs_mean": 0.27775069574515027, "eval_signal/advantage_pre_scale_std": 0.3263417234023412, "eval_signal/advantage_std": 0.3263417234023412, "eval_signal/brier_reward/centered_abs_mean": 0.1897987350821495, "eval_signal/brier_reward/group_std_mean": 0.25104477753241855, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09489936754107475, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.09489936754107475, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/group_std_mean": 0.009820927555362383, "eval_signal/format_reward/group_zero_std_frac": 0.944444457689921, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.15858287115891775, "eval_signal/mean_confidence_reward/group_std_mean": 0.20427095144987106, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.585828651210856e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.585828651210856e-06, "eval_steps_per_second": 0.033, "step": 800 }, { "epoch": 1.9230769230769231, "step": 800, "train_probe_calibration/aurc": 0.09918878298184919, "train_probe_calibration/batch_distribution_entropy": 0.4207500172518936, "train_probe_calibration/batch_entropy_100bins": 0.2282892016326656, "train_probe_calibration/batch_entropy_10bins": 0.4207500172518936, "train_probe_calibration/batch_entropy_50bins": 0.2687383544736379, "train_probe_calibration/batch_uniqueness": -0.12686578449905483, "train_probe_calibration/confidence_entropy": 0.4009151837695988, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.7878260869565218, "train_probe_calibration/coverage@15%": 0.8956521739130435, "train_probe_calibration/coverage@20%": 0.9808695652173913, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.4207500172518936, "train_probe_calibration/distribution_entropy_100": 0.2282892016326656, "train_probe_calibration/ece": 0.03947826086956502, "train_probe_calibration/mean_confidence": 0.8020869565217393, "train_probe_calibration/unique_confidence_per_question": 0.006944444444444444, "train_probe_calibration/unique_confidences": 8, "train_probe_completions/clipped_ratio": 0.0017361111111111234, "train_probe_completions/max_length": 2715.1666666666665, "train_probe_completions/max_terminated_length": 2715.1666666666665, "train_probe_completions/mean_length": 1000.1415100097656, "train_probe_completions/mean_terminated_length": 1001.8871358235677, "train_probe_completions/min_length": 197.66666666666666, "train_probe_completions/min_terminated_length": 261.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 2137044056.0, "train_probe_reward": 1.3348864515622456, "train_probe_reward_std": 0.29340456426143646, "train_probe_rewards/accuracy_reward": 0.7881944378217062, "train_probe_rewards/brier_reward": 0.8832986156145731, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9982638955116272, "train_probe_rewards/mean_confidence_reward": 0.8006944358348846, "train_probe_runtime": 168.1813, "train_probe_samples_per_second": 5.946, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3246527711550395, "train_probe_signal/accuracy_reward/group_std_mean": 0.4065794100364049, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16232638557751974, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.16232638557751974, "train_probe_signal/advantage_abs_mean": 0.2283279299736023, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2283279299736023, "train_probe_signal/advantage_pre_scale_std": 0.2924281011025111, "train_probe_signal/advantage_std": 0.2924281011025111, "train_probe_signal/brier_reward/centered_abs_mean": 0.15074980755647024, "train_probe_signal/brier_reward/group_std_mean": 0.21790056924025217, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07537490377823512, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07537490377823512, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/format_reward/group_std_mean": 0.009820927555362383, "train_probe_signal/format_reward/group_zero_std_frac": 0.944444457689921, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1441731428106626, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.19188025345404944, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.4417313991543779e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.4417313991543779e-06, "train_probe_steps_per_second": 0.036 }, { "calibration/aurc": 0.18385633368569074, "calibration/batch_distribution_entropy": 0.44733582079772455, "calibration/batch_entropy_100bins": 0.24339174978119438, "calibration/batch_entropy_10bins": 0.44733582079772455, "calibration/batch_entropy_50bins": 0.2865168297969042, "calibration/batch_uniqueness": -0.033245663910445734, "calibration/confidence_entropy": 0.4115919705930738, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.35045963881636205, "calibration/coverage@15%": 0.3666476283724978, "calibration/coverage@20%": 0.5364583333333333, "calibration/coverage@25%": 0.7046875, "calibration/coverage@30%": 0.8723958333333334, "calibration/coverage@5%": 0.31394827023498695, "calibration/distribution_entropy_10": 0.44733582079772455, "calibration/distribution_entropy_100": 0.24339174978119438, "calibration/ece": 0.13457503807658838, "calibration/mean_confidence": 0.779022655570061, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 3408.8, "completions/max_terminated_length": 3408.8, "completions/mean_length": 983.4182373046875, "completions/mean_terminated_length": 984.1210205078125, "completions/min_length": 64.2, "completions/min_terminated_length": 303.0, "epoch": 1.9350961538461537, "grad_norm": 0.0008335274760611355, "learning_rate": 1.4122596153846154e-06, "loss": 0.0011, "num_tokens": 2151477610.0, "reward": 1.3270015478134156, "reward_std": 0.11657609343528748, "rewards/accuracy_reward": 0.7857638955116272, "rewards/brier_reward": 0.8691779732704162, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9990451216697693, "rewards/mean_confidence_reward": 0.8054861068725586, "sampling/batch_mean_priority_error": 0.054985726086469545, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.10833333333333332, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.0111892344430089, "sampling/priority_kl": 0.02999982424080372, "sampling/priority_scale": 0.8299192130332813, "sampling/prob_entropy": 10.278948402404785, "sampling/prob_max": 6.065202906029299e-05, "sampling/prob_min": 2.03645795409102e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9272000074386597, "sampling/prompt_draws_total": 57816.0, "sampling/seen_fraction": 0.9316666722297668, "sampling/unseen_fraction": 0.06833332777023315, "signal/accuracy_reward/centered_abs_mean": 0.11029731184244156, "signal/accuracy_reward/group_std_mean": 0.15226011127233505, "signal/accuracy_reward/group_zero_std_frac": 0.5416666686534881, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05514865592122078, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05514865592122078, "signal/advantage_abs_mean": 0.08358793258666992, "signal/advantage_pre_scale_abs_mean": 0.08358793258666992, "signal/advantage_pre_scale_std": 0.1721019297838211, "signal/advantage_std": 0.1721019297838211, "signal/brier_reward/centered_abs_mean": 0.07183887735009194, "signal/brier_reward/group_std_mean": 0.09882517009973527, "signal/brier_reward/group_zero_std_frac": 0.40555556416511535, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03591943867504597, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03591943867504597, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0018500433885492384, "signal/format_reward/group_std_mean": 0.005401509813964367, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009250216942746192, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009250216942746192, "signal/mean_confidence_reward/centered_abs_mean": 0.04548124074935913, "signal/mean_confidence_reward/group_std_mean": 0.060777968168258666, "signal/mean_confidence_reward/group_zero_std_frac": 0.4666666746139526, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.548124081793503e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.548124081793503e-07, "step": 805 }, { "calibration/aurc": 0.17190273395851366, "calibration/batch_distribution_entropy": 0.4751986455352262, "calibration/batch_entropy_100bins": 0.2599899300001808, "calibration/batch_entropy_10bins": 0.4751986455352262, "calibration/batch_entropy_50bins": 0.30605593899438904, "calibration/batch_uniqueness": 0.04454210069444448, "calibration/confidence_entropy": 0.4235046630804894, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.2958333333333333, "calibration/coverage@15%": 0.484375, "calibration/coverage@20%": 0.646875, "calibration/coverage@25%": 0.8208333333333334, "calibration/coverage@30%": 0.9036458333333334, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.4751986455352262, "calibration/distribution_entropy_100": 0.2599899300001808, "calibration/ece": 0.11010416666666667, "calibration/mean_confidence": 0.7810416666666667, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009548611111111161, "completions/max_length": 3496.4, "completions/max_terminated_length": 3496.4, "completions/mean_length": 987.4782958984375, "completions/mean_terminated_length": 988.4582153320313, "completions/min_length": 49.8, "completions/min_terminated_length": 274.6, "epoch": 1.9471153846153846, "grad_norm": 0.0006270384765230119, "learning_rate": 1.3822115384615387e-06, "loss": 0.0001, "num_tokens": 2165978512.0, "reward": 1.3170762062072754, "reward_std": 0.10379236936569214, "rewards/accuracy_reward": 0.7657117962837219, "rewards/brier_reward": 0.8693793535232544, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9990451335906982, "rewards/mean_confidence_reward": 0.8073871612548829, "sampling/batch_mean_priority_error": 0.055473721058377626, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.12222222222222223, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011242944747209549, "sampling/priority_kl": 0.029999969899654387, "sampling/priority_scale": 0.8319486677879467, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 6.088290538173169e-05, "sampling/prob_min": 2.0387604672578162e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9391999959945678, "sampling/prompt_draws_total": 58176.0, "sampling/seen_fraction": 0.9329066634178161, "sampling/unseen_fraction": 0.06709333658218383, "signal/accuracy_reward/centered_abs_mean": 0.09194336086511612, "signal/accuracy_reward/group_std_mean": 0.12984519004821776, "signal/accuracy_reward/group_zero_std_frac": 0.6055555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04597168043255806, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04597168043255806, "signal/advantage_abs_mean": 0.07197851687669754, "signal/advantage_pre_scale_abs_mean": 0.07197851687669754, "signal/advantage_pre_scale_std": 0.1576433524489403, "signal/advantage_std": 0.1576433524489403, "signal/brier_reward/centered_abs_mean": 0.06384159326553344, "signal/brier_reward/group_std_mean": 0.09090788215398789, "signal/brier_reward/group_zero_std_frac": 0.45555556416511533, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03192079663276672, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03192079663276672, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0018066406017169356, "signal/format_reward/group_std_mean": 0.004452054016292095, "signal/format_reward/group_zero_std_frac": 0.9777777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009033203008584678, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009033203008584678, "signal/mean_confidence_reward/centered_abs_mean": 0.04468643441796303, "signal/mean_confidence_reward/group_std_mean": 0.05934521555900574, "signal/mean_confidence_reward/group_zero_std_frac": 0.522222226858139, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.4686433966489856e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.4686433966489856e-07, "step": 810 }, { "calibration/aurc": 0.19579915894519634, "calibration/batch_distribution_entropy": 0.4647881882032324, "calibration/batch_entropy_100bins": 0.24840480950407517, "calibration/batch_entropy_10bins": 0.4647881882032324, "calibration/batch_entropy_50bins": 0.29241812259205274, "calibration/batch_uniqueness": 0.00656296614868152, "calibration/confidence_entropy": 0.41222186254121224, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.37643979057591626, "calibration/coverage@15%": 0.4, "calibration/coverage@20%": 0.4, "calibration/coverage@25%": 0.6817708333333333, "calibration/coverage@30%": 0.6921875, "calibration/coverage@5%": 0.13802083333333331, "calibration/distribution_entropy_10": 0.4647881882032324, "calibration/distribution_entropy_100": 0.24840480950407517, "calibration/ece": 0.1426440068247372, "calibration/mean_confidence": 0.8000327132573739, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0011284722222222542, "completions/max_length": 3592.4, "completions/max_terminated_length": 3592.4, "completions/mean_length": 1039.9433349609376, "completions/mean_terminated_length": 1041.126171875, "completions/min_length": 0.0, "completions/min_terminated_length": 300.8, "epoch": 1.9591346153846154, "grad_norm": 0.0007290610228665173, "learning_rate": 1.3521634615384617e-06, "loss": 0.0003, "num_tokens": 2181050851.0, "reward": 1.3023419618606566, "reward_std": 0.11788448095321655, "rewards/accuracy_reward": 0.745225703716278, "rewards/brier_reward": 0.8608315944671631, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9986111044883728, "rewards/mean_confidence_reward": 0.7790451526641846, "sampling/batch_mean_priority_error": 0.05542332055228545, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.125, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.01129895206540823, "sampling/priority_kl": 0.0299999862909317, "sampling/priority_scale": 0.8350183546775952, "sampling/prob_entropy": 10.278951072692871, "sampling/prob_max": 6.1165435181465e-05, "sampling/prob_min": 2.0399702407303266e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.951200008392334, "sampling/prompt_draws_total": 58536.0, "sampling/seen_fraction": 0.9344400048255921, "sampling/unseen_fraction": 0.06555999517440796, "signal/accuracy_reward/centered_abs_mean": 0.10779622346162795, "signal/accuracy_reward/group_std_mean": 0.15116337835788726, "signal/accuracy_reward/group_zero_std_frac": 0.5388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05389811173081398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05389811173081398, "signal/advantage_abs_mean": 0.08279699683189393, "signal/advantage_pre_scale_abs_mean": 0.08279699683189393, "signal/advantage_pre_scale_std": 0.171286541223526, "signal/advantage_std": 0.171286541223526, "signal/brier_reward/centered_abs_mean": 0.07373405247926712, "signal/brier_reward/group_std_mean": 0.10261615216732026, "signal/brier_reward/group_zero_std_frac": 0.3638888895511627, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03686702623963356, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03686702623963356, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.002604166604578495, "signal/format_reward/group_std_mean": 0.005957829533144832, "signal/format_reward/group_zero_std_frac": 0.9722222089767456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013020833022892476, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013020833022892476, "signal/mean_confidence_reward/centered_abs_mean": 0.0520480789244175, "signal/mean_confidence_reward/group_std_mean": 0.06866604089736938, "signal/mean_confidence_reward/group_zero_std_frac": 0.42222222685813904, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.204807735026406e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.204807735026406e-07, "step": 815 }, { "calibration/aurc": 0.09833870413235231, "calibration/batch_distribution_entropy": 0.4614352913132178, "calibration/batch_entropy_100bins": 0.2502082935466154, "calibration/batch_entropy_10bins": 0.4614352913132178, "calibration/batch_entropy_50bins": 0.2945411548268037, "calibration/batch_uniqueness": 0.04135342083208417, "calibration/confidence_entropy": 0.4304910644689368, "calibration/coverage@0%": 0.29010416666666666, "calibration/coverage@1%": 0.303125, "calibration/coverage@10%": 0.5684670275590551, "calibration/coverage@15%": 0.8559728953797551, "calibration/coverage@20%": 0.9035079288391824, "calibration/coverage@25%": 0.9196850393700787, "calibration/coverage@30%": 0.9417322834645668, "calibration/coverage@5%": 0.3265625, "calibration/distribution_entropy_10": 0.4614352913132178, "calibration/distribution_entropy_100": 0.2502082935466154, "calibration/ece": 0.09957528756604497, "calibration/mean_confidence": 0.7683144042577251, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013020833333333482, "completions/max_length": 3667.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 1022.9057495117188, "completions/mean_terminated_length": 1024.2632934570313, "completions/min_length": 0.0, "completions/min_terminated_length": 271.4, "epoch": 1.9711538461538463, "grad_norm": 0.0005470445030368865, "learning_rate": 1.3221153846153848e-06, "loss": -0.0004, "num_tokens": 2195936517.0, "reward": 1.323672842979431, "reward_std": 0.09248801916837693, "rewards/accuracy_reward": 0.7696180462837219, "rewards/brier_reward": 0.8791007280349732, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9986111044883728, "rewards/mean_confidence_reward": 0.7945225596427917, "sampling/batch_mean_priority_error": 0.048565969888739534, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09166666666666666, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011350595764815808, "sampling/priority_kl": 0.030000149831175806, "sampling/priority_scale": 0.8369162738090381, "sampling/prob_entropy": 10.278959846496582, "sampling/prob_max": 6.138679746072739e-05, "sampling/prob_min": 2.042250198428519e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9632000207901001, "sampling/prompt_draws_total": 58896.0, "sampling/seen_fraction": 0.9355800032615662, "sampling/unseen_fraction": 0.06441999673843384, "signal/accuracy_reward/centered_abs_mean": 0.07965494841337203, "signal/accuracy_reward/group_std_mean": 0.1155205637216568, "signal/accuracy_reward/group_zero_std_frac": 0.6333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03982747420668602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03982747420668602, "signal/advantage_abs_mean": 0.06276309192180633, "signal/advantage_pre_scale_abs_mean": 0.06276309192180633, "signal/advantage_pre_scale_std": 0.14800895154476165, "signal/advantage_std": 0.14800895154476165, "signal/brier_reward/centered_abs_mean": 0.05834457501769066, "signal/brier_reward/group_std_mean": 0.08316372632980347, "signal/brier_reward/group_zero_std_frac": 0.4250000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02917228750884533, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02917228750884533, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0026692708022892477, "signal/format_reward/group_std_mean": 0.0072588598355650905, "signal/format_reward/group_zero_std_frac": 0.9611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013346354011446238, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013346354011446238, "signal/mean_confidence_reward/centered_abs_mean": 0.042954657971858975, "signal/mean_confidence_reward/group_std_mean": 0.05850115194916725, "signal/mean_confidence_reward/group_zero_std_frac": 0.48055556416511536, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.295465771519957e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.295465771519957e-07, "step": 820 }, { "calibration/aurc": 0.15048849581866025, "calibration/batch_distribution_entropy": 0.401514669951322, "calibration/batch_entropy_100bins": 0.2161378706493462, "calibration/batch_entropy_10bins": 0.401514669951322, "calibration/batch_entropy_50bins": 0.25443400424696244, "calibration/batch_uniqueness": -0.13833550347222218, "calibration/confidence_entropy": 0.4026604023740436, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.3026041666666667, "calibration/coverage@15%": 0.6947916666666667, "calibration/coverage@20%": 0.7494791666666667, "calibration/coverage@25%": 0.7885416666666667, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.2635416666666667, "calibration/distribution_entropy_10": 0.401514669951322, "calibration/distribution_entropy_100": 0.2161378706493462, "calibration/ece": 0.12052083333333338, "calibration/mean_confidence": 0.8017708333333335, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000868055555555558, "completions/max_length": 3711.8, "completions/max_terminated_length": 3711.8, "completions/mean_length": 1036.2858642578126, "completions/mean_terminated_length": 1037.209228515625, "completions/min_length": 118.2, "completions/min_terminated_length": 307.6, "epoch": 1.9831730769230769, "grad_norm": 0.0007763904868625104, "learning_rate": 1.292067307692308e-06, "loss": 0.0011, "num_tokens": 2210964898.0, "reward": 1.2921491622924806, "reward_std": 0.12069564908742905, "rewards/accuracy_reward": 0.7279513955116272, "rewards/brier_reward": 0.8572864413261414, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9990451335906982, "rewards/mean_confidence_reward": 0.7701562523841858, "sampling/batch_mean_priority_error": 0.0555428616098097, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.12777777777777777, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011398331820964813, "sampling/priority_kl": 0.03000003807246685, "sampling/priority_scale": 0.8398089825874194, "sampling/prob_entropy": 10.27895736694336, "sampling/prob_max": 6.166202801978215e-05, "sampling/prob_min": 2.0436170962057078e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9752000093460083, "sampling/prompt_draws_total": 59256.0, "sampling/seen_fraction": 0.9369800090789795, "sampling/unseen_fraction": 0.06301999092102051, "signal/accuracy_reward/centered_abs_mean": 0.11948784738779068, "signal/accuracy_reward/group_std_mean": 0.1649380922317505, "signal/accuracy_reward/group_zero_std_frac": 0.5055555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05974392369389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05974392369389534, "signal/advantage_abs_mean": 0.08622851222753525, "signal/advantage_pre_scale_abs_mean": 0.08622851222753525, "signal/advantage_pre_scale_std": 0.17191420793533324, "signal/advantage_std": 0.17191420793533324, "signal/brier_reward/centered_abs_mean": 0.0750839278101921, "signal/brier_reward/group_std_mean": 0.10442855954170227, "signal/brier_reward/group_zero_std_frac": 0.3444444537162781, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03754196390509605, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03754196390509605, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.0017740885028615593, "signal/format_reward/group_std_mean": 0.00407174676656723, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008870442514307797, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008870442514307797, "signal/mean_confidence_reward/centered_abs_mean": 0.05379992350935936, "signal/mean_confidence_reward/group_std_mean": 0.07070956826210022, "signal/mean_confidence_reward/group_zero_std_frac": 0.4111111104488373, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.379991762310965e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.379991762310965e-07, "step": 825 }, { "calibration/aurc": 0.08829125696869047, "calibration/batch_distribution_entropy": 0.48584406488715803, "calibration/batch_entropy_100bins": 0.26703580379083747, "calibration/batch_entropy_10bins": 0.48584406488715803, "calibration/batch_entropy_50bins": 0.3143502276194673, "calibration/batch_uniqueness": 0.05997741525988515, "calibration/confidence_entropy": 0.4198865761959188, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.17552083333333332, "calibration/coverage@10%": 0.5196665578764142, "calibration/coverage@15%": 0.8503182114882506, "calibration/coverage@20%": 0.9415483572671889, "calibration/coverage@25%": 0.9634464751958225, "calibration/coverage@30%": 0.9765013054830287, "calibration/coverage@5%": 0.3269160683202785, "calibration/distribution_entropy_10": 0.48584406488715803, "calibration/distribution_entropy_100": 0.26703580379083747, "calibration/ece": 0.0851956864664926, "calibration/mean_confidence": 0.7715115045691908, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005208333333333482, "completions/max_length": 3806.4, "completions/max_terminated_length": 3806.4, "completions/mean_length": 1043.68623046875, "completions/mean_terminated_length": 1044.231591796875, "completions/min_length": 48.0, "completions/min_terminated_length": 267.8, "epoch": 1.9951923076923077, "grad_norm": 0.0005658183363266289, "learning_rate": 1.2620192307692309e-06, "loss": 0.0008, "num_tokens": 2226050883.0, "reward": 1.335673975944519, "reward_std": 0.09520231038331986, "rewards/accuracy_reward": 0.7828993082046509, "rewards/brier_reward": 0.8890407800674438, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9993923544883728, "rewards/mean_confidence_reward": 0.7782551884651184, "sampling/batch_mean_priority_error": 0.03993952312948028, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11111111111111112, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.01144140474498272, "sampling/priority_kl": 0.029999879747629167, "sampling/priority_scale": 0.8430393754737452, "sampling/prob_entropy": 10.278953552246094, "sampling/prob_max": 6.195996684255078e-05, "sampling/prob_min": 2.0447872157092206e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9871999740600585, "sampling/prompt_draws_total": 59616.0, "sampling/seen_fraction": 0.9384400010108948, "sampling/unseen_fraction": 0.061559998989105226, "signal/accuracy_reward/centered_abs_mean": 0.09328884482383729, "signal/accuracy_reward/group_std_mean": 0.13105592578649522, "signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04664442241191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04664442241191864, "signal/advantage_abs_mean": 0.06662840321660042, "signal/advantage_pre_scale_abs_mean": 0.06662840321660042, "signal/advantage_pre_scale_std": 0.1486303061246872, "signal/advantage_std": 0.1486303061246872, "signal/brier_reward/centered_abs_mean": 0.05784967392683029, "signal/brier_reward/group_std_mean": 0.08218971043825149, "signal/brier_reward/group_zero_std_frac": 0.41111111640930176, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028924836963415145, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.028924836963415145, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.001177300326526165, "signal/format_reward/group_std_mean": 0.0034373244270682335, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501632630825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005886501632630825, "signal/mean_confidence_reward/centered_abs_mean": 0.04647516682744026, "signal/mean_confidence_reward/group_std_mean": 0.061248787492513654, "signal/mean_confidence_reward/group_zero_std_frac": 0.46111111640930175, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.647516277600516e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.647516277600516e-07, "step": 830 }, { "calibration/aurc": 0.07549365578168557, "calibration/batch_distribution_entropy": 0.5254938552647503, "calibration/batch_entropy_100bins": 0.2960007588278585, "calibration/batch_entropy_10bins": 0.5254938552647503, "calibration/batch_entropy_50bins": 0.34844730404000235, "calibration/batch_uniqueness": 0.1832330232638631, "calibration/confidence_entropy": 0.43629637896002427, "calibration/coverage@0%": 0.10885416666666667, "calibration/coverage@1%": 0.11458333333333333, "calibration/coverage@10%": 0.7351716958601454, "calibration/coverage@15%": 0.8265016909603009, "calibration/coverage@20%": 0.8688489605819507, "calibration/coverage@25%": 0.9425647965879265, "calibration/coverage@30%": 0.9734375, "calibration/coverage@5%": 0.5424786745406823, "calibration/distribution_entropy_10": 0.5254938552647503, "calibration/distribution_entropy_100": 0.2960007588278585, "calibration/ece": 0.10865095482720322, "calibration/mean_confidence": 0.7365200278999885, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000694444444444442, "completions/max_length": 3274.4, "completions/max_terminated_length": 3274.4, "completions/mean_length": 973.68837890625, "completions/mean_terminated_length": 974.4301025390625, "completions/min_length": 162.8, "completions/min_terminated_length": 360.8, "epoch": 2.0072115384615383, "grad_norm": 0.0005269041866995394, "learning_rate": 1.231971153846154e-06, "loss": 0.0003, "num_tokens": 2241097670.0, "reward": 1.3262059450149537, "reward_std": 0.09740138053894043, "rewards/accuracy_reward": 0.7748263955116272, "rewards/brier_reward": 0.8787855863571167, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9987847089767456, "rewards/mean_confidence_reward": 0.7612066030502319, "sampling/batch_mean_priority_error": 0.043829235399801214, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09722222222222224, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011479710228741169, "sampling/priority_kl": 0.029999543726444245, "sampling/priority_scale": 0.8459879100089893, "sampling/prob_entropy": 10.278957176208497, "sampling/prob_max": 6.224302342161536e-05, "sampling/prob_min": 2.0461800886550918e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9991999864578247, "sampling/prompt_draws_total": 59976.0, "sampling/seen_fraction": 0.9398066639900208, "sampling/unseen_fraction": 0.06019333600997925, "signal/accuracy_reward/centered_abs_mean": 0.10342881977558135, "signal/accuracy_reward/group_std_mean": 0.14174398630857468, "signal/accuracy_reward/group_zero_std_frac": 0.5777777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05171440988779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05171440988779068, "signal/advantage_abs_mean": 0.06915109902620316, "signal/advantage_pre_scale_abs_mean": 0.06915109902620316, "signal/advantage_pre_scale_std": 0.14984942972660065, "signal/advantage_std": 0.14984942972660065, "signal/brier_reward/centered_abs_mean": 0.0605738990008831, "signal/brier_reward/group_std_mean": 0.08339389562606811, "signal/brier_reward/group_zero_std_frac": 0.3916666626930237, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03028694950044155, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03028694950044155, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.002267795195803046, "signal/format_reward/group_std_mean": 0.0049757368862628935, "signal/format_reward/group_zero_std_frac": 0.9777777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001133897597901523, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001133897597901523, "signal/mean_confidence_reward/centered_abs_mean": 0.04679037928581238, "signal/mean_confidence_reward/group_std_mean": 0.06196520924568176, "signal/mean_confidence_reward/group_zero_std_frac": 0.43055555820465086, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.679037715504819e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.679037715504819e-07, "step": 835 }, { "calibration/aurc": 0.147330218464356, "calibration/batch_distribution_entropy": 0.5416599200734422, "calibration/batch_entropy_100bins": 0.2980373510942677, "calibration/batch_entropy_10bins": 0.5416599200734422, "calibration/batch_entropy_50bins": 0.35084474750423256, "calibration/batch_uniqueness": 0.22830577188595483, "calibration/confidence_entropy": 0.44884432769273436, "calibration/coverage@0%": 0.11770833333333333, "calibration/coverage@1%": 0.12291666666666667, "calibration/coverage@10%": 0.31079607267188863, "calibration/coverage@15%": 0.3739365752828546, "calibration/coverage@20%": 0.8098958333333333, "calibration/coverage@25%": 0.8765625, "calibration/coverage@30%": 0.921875, "calibration/coverage@5%": 0.2774409812880766, "calibration/distribution_entropy_10": 0.5416599200734422, "calibration/distribution_entropy_100": 0.2980373510942677, "calibration/ece": 0.11948107049608356, "calibration/mean_confidence": 0.7425214860748478, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001128472222222232, "completions/max_length": 3801.6, "completions/max_terminated_length": 3801.6, "completions/mean_length": 1044.3095581054688, "completions/mean_terminated_length": 1045.5166015625, "completions/min_length": 86.6, "completions/min_terminated_length": 236.6, "epoch": 2.019230769230769, "grad_norm": 0.0006535506690852344, "learning_rate": 1.201923076923077e-06, "loss": 0.0007, "num_tokens": 2256206260.0, "reward": 1.3112290143966674, "reward_std": 0.09778019338846207, "rewards/accuracy_reward": 0.7542534708976746, "rewards/brier_reward": 0.8694054126739502, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9987847208976746, "rewards/mean_confidence_reward": 0.729296863079071, "sampling/batch_mean_priority_error": 0.04321197037989741, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09166666666666667, "sampling/error_ema_max": 0.23887455463409424, "sampling/error_ema_mean": 0.011516995541751385, "sampling/priority_kl": 0.029999730363488197, "sampling/priority_scale": 0.8477530895499512, "sampling/prob_entropy": 10.278942108154297, "sampling/prob_max": 6.246392877073959e-05, "sampling/prob_min": 2.0486806897679342e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.0112000465393067, "sampling/prompt_draws_total": 60336.0, "sampling/seen_fraction": 0.9407866597175598, "sampling/unseen_fraction": 0.059213340282440186, "signal/accuracy_reward/centered_abs_mean": 0.10748155415058136, "signal/accuracy_reward/group_std_mean": 0.14893923699855804, "signal/accuracy_reward/group_zero_std_frac": 0.5416666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05374077707529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05374077707529068, "signal/advantage_abs_mean": 0.06958878040313721, "signal/advantage_pre_scale_abs_mean": 0.06958878040313721, "signal/advantage_pre_scale_std": 0.14752740263938904, "signal/advantage_std": 0.14752740263938904, "signal/brier_reward/centered_abs_mean": 0.06264480128884316, "signal/brier_reward/group_std_mean": 0.08568576574325562, "signal/brier_reward/group_zero_std_frac": 0.325, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03132240064442158, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03132240064442158, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.002289496548473835, "signal/format_reward/group_std_mean": 0.005327311344444752, "signal/format_reward/group_zero_std_frac": 0.9749999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0011447482742369176, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0011447482742369176, "signal/mean_confidence_reward/centered_abs_mean": 0.0524658277630806, "signal/mean_confidence_reward/group_std_mean": 0.06992786824703216, "signal/mean_confidence_reward/group_zero_std_frac": 0.35555556416511536, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.246582588824821e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.246582588824821e-07, "step": 840 } ], "logging_steps": 5, "max_steps": 1040, "num_input_tokens_seen": 2256206260, "num_train_epochs": 3, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }