{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.625441469460826, "calibration/batch_distribution_entropy": 0.6561097488908417, "calibration/confidence_entropy": 0.34487710695615964, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49470016071002154, "calibration/mean_confidence": 0.7911784320650452, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03603515625, "completions/max_length": 1509.6, "completions/max_terminated_length": 1509.6, "completions/mean_length": 214.22236328125, "completions/mean_terminated_length": 222.21807250976562, "completions/min_length": 0.0, "completions/min_terminated_length": 1.8, "epoch": 0.016, "grad_norm": 0.025879332795739174, "learning_rate": 3.1249999999999997e-07, "loss": 0.0125, "num_tokens": 17037669.0, "reward": 0.5616797089576722, "reward_std": 0.4342596590518951, "rewards/accuracy_reward": 0.22119140625, "rewards/batch_coverage_0": 0.05096393823623657, "rewards/batch_coverage_1": 0.05096393823623657, "rewards/batch_coverage_10": 0.07000155597925187, "rewards/batch_coverage_15": 0.0811154417693615, "rewards/batch_coverage_20": 0.09202761203050613, "rewards/batch_coverage_25": 0.10896564424037933, "rewards/batch_coverage_5": 0.062107541412115094, "rewards/brier_reward": 0.37876845598220826, "rewards/confidence_uniqueness_reward": 0.4908894419670105, "rewards/format_reward": 0.68837890625, "rewards/frontier_aurc_reward": 0.30465003848075867, "rewards/frontier_ece_reward": 0.30465003848075867, "rewards/frontier_entropy_batch_reward": -0.6595894694328308, "signal/accuracy_reward/centered_abs_mean": 0.242242431640625, "signal/accuracy_reward/group_std_mean": 0.284113472700119, "signal/accuracy_reward/group_zero_std_frac": 0.3125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1211212158203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1211212158203125, "signal/advantage_abs_mean": 0.3647449553012848, "signal/advantage_pre_scale_abs_mean": 0.3647449553012848, "signal/advantage_pre_scale_std": 0.4435017466545105, "signal/advantage_std": 0.4435017466545105, "signal/batch_coverage_0/centered_abs_mean": 0.07645872831344605, "signal/batch_coverage_0/group_std_mean": 0.1278139054775238, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.007645872887223959, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.007645872887223959, "signal/batch_coverage_1/centered_abs_mean": 0.07645872831344605, "signal/batch_coverage_1/group_std_mean": 0.1278139054775238, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.007645872887223959, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.007645872887223959, "signal/batch_coverage_10/centered_abs_mean": 0.08913887441158294, "signal/batch_coverage_10/group_std_mean": 0.1408896714448929, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.008913887571543455, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.008913887571543455, "signal/batch_coverage_15/centered_abs_mean": 0.09963576942682266, "signal/batch_coverage_15/group_std_mean": 0.15215918719768523, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.00996357724070549, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00996357724070549, "signal/batch_coverage_20/centered_abs_mean": 0.11348118036985397, "signal/batch_coverage_20/group_std_mean": 0.16662466824054717, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011348118260502815, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.011348118260502815, "signal/batch_coverage_25/centered_abs_mean": 0.13774538338184356, "signal/batch_coverage_25/group_std_mean": 0.19362023174762727, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013774538971483707, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013774538971483707, "signal/batch_coverage_5/centered_abs_mean": 0.08416972756385803, "signal/batch_coverage_5/group_std_mean": 0.1365703135728836, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008416973147541284, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.008416973147541284, "signal/brier_reward/centered_abs_mean": 0.32081450819969176, "signal/brier_reward/group_std_mean": 0.3656565546989441, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032081450521945956, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.032081450521945956, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2961998999118805, "signal/confidence_uniqueness_reward/group_std_mean": 0.34802345037460325, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029619990289211272, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029619990289211272, "signal/format_reward/centered_abs_mean": 0.400396728515625, "signal/format_reward/group_std_mean": 0.4517782092094421, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2001983642578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2001983642578125, "signal/frontier_aurc_reward/centered_abs_mean": 0.29386022686958313, "signal/frontier_aurc_reward/group_std_mean": 0.3446810841560364, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036732527427375317, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036732527427375317, "signal/frontier_ece_reward/centered_abs_mean": 0.29386022686958313, "signal/frontier_ece_reward/group_std_mean": 0.3446810841560364, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029386021941900254, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029386021941900254, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42292317748069763, "signal/frontier_entropy_batch_reward/group_std_mean": 0.47080921530723574, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04229231923818588, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04229231923818588, "step": 5 }, { "calibration/aurc": 0.6826533198028557, "calibration/batch_distribution_entropy": 0.6420826904038014, "calibration/confidence_entropy": 0.3419187196643387, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5423836619103499, "calibration/mean_confidence": 0.7939850831364194, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357421875, "completions/max_length": 1513.4, "completions/max_terminated_length": 1513.4, "completions/mean_length": 203.18544921875, "completions/mean_terminated_length": 210.78201904296876, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.018445724621415138, "learning_rate": 6.249999999999999e-07, "loss": 0.0027, "num_tokens": 34218640.0, "reward": 0.5817601561546326, "reward_std": 0.4154947519302368, "rewards/accuracy_reward": 0.210546875, "rewards/batch_coverage_0": 0.056951449066400525, "rewards/batch_coverage_1": 0.056951449066400525, "rewards/batch_coverage_10": 0.07880671173334122, "rewards/batch_coverage_15": 0.096053147315979, "rewards/batch_coverage_20": 0.10706809908151627, "rewards/batch_coverage_25": 0.1138889878988266, "rewards/batch_coverage_5": 0.06925816759467125, "rewards/brier_reward": 0.3806018054485321, "rewards/confidence_uniqueness_reward": 0.5155683040618897, "rewards/format_reward": 0.7294921875, "rewards/frontier_aurc_reward": 0.3003185033798218, "rewards/frontier_ece_reward": 0.3003185033798218, "rewards/frontier_entropy_batch_reward": -0.6956000924110413, "signal/accuracy_reward/centered_abs_mean": 0.22080078125, "signal/accuracy_reward/group_std_mean": 0.2653929114341736, "signal/accuracy_reward/group_zero_std_frac": 0.34375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.110400390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.110400390625, "signal/advantage_abs_mean": 0.33845388889312744, "signal/advantage_pre_scale_abs_mean": 0.33845388889312744, "signal/advantage_pre_scale_std": 0.4272084295749664, "signal/advantage_std": 0.4272084295749664, "signal/batch_coverage_0/centered_abs_mean": 0.0791116937994957, "signal/batch_coverage_0/group_std_mean": 0.12949045449495317, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.007911169622093438, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.007911169622093438, "signal/batch_coverage_1/centered_abs_mean": 0.0791116937994957, "signal/batch_coverage_1/group_std_mean": 0.12949045449495317, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.007911169622093438, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.007911169622093438, "signal/batch_coverage_10/centered_abs_mean": 0.09277822971343994, "signal/batch_coverage_10/group_std_mean": 0.14479392766952515, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.009277822449803352, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.009277822449803352, "signal/batch_coverage_15/centered_abs_mean": 0.10906872600317001, "signal/batch_coverage_15/group_std_mean": 0.1635884314775467, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010906872153282166, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.010906872153282166, "signal/batch_coverage_20/centered_abs_mean": 0.1215769112110138, "signal/batch_coverage_20/group_std_mean": 0.17692172527313232, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012157691456377507, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012157691456377507, "signal/batch_coverage_25/centered_abs_mean": 0.13173424154520036, "signal/batch_coverage_25/group_std_mean": 0.1880470871925354, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013173424638807774, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013173424638807774, "signal/batch_coverage_5/centered_abs_mean": 0.08721371293067932, "signal/batch_coverage_5/group_std_mean": 0.13860590159893035, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008721371926367284, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.008721371926367284, "signal/brier_reward/centered_abs_mean": 0.305008465051651, "signal/brier_reward/group_std_mean": 0.35352975130081177, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03050084561109543, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03050084561109543, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2797845423221588, "signal/confidence_uniqueness_reward/group_std_mean": 0.33646575808525087, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027978454902768134, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027978454902768134, "signal/format_reward/centered_abs_mean": 0.368994140625, "signal/format_reward/group_std_mean": 0.432600337266922, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1844970703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1844970703125, "signal/frontier_aurc_reward/centered_abs_mean": 0.27742584943771365, "signal/frontier_aurc_reward/group_std_mean": 0.33283730745315554, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003467823192477226, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003467823192477226, "signal/frontier_ece_reward/centered_abs_mean": 0.27742584943771365, "signal/frontier_ece_reward/group_std_mean": 0.33283730745315554, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02774258553981781, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02774258553981781, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3977131128311157, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4558208167552948, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03977131098508835, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03977131098508835, "step": 10 }, { "calibration/aurc": 0.5891158731428874, "calibration/batch_distribution_entropy": 0.6520017602661853, "calibration/buffer_distribution_entropy": 0.6638543025012997, "calibration/confidence_entropy": 0.33411021460596607, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.46858668054755875, "calibration/mean_confidence": 0.7970036765667995, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0201171875, "completions/max_length": 1450.6, "completions/max_terminated_length": 1450.6, "completions/mean_length": 174.7373046875, "completions/mean_terminated_length": 178.42660827636718, "completions/min_length": 0.0, "completions/min_terminated_length": 3.6, "epoch": 0.048, "grad_norm": 0.04330306500196457, "learning_rate": 9.374999999999999e-07, "loss": 0.0076, "num_tokens": 51056686.0, "reward": 0.7281944155693054, "reward_std": 0.36009618639945984, "rewards/accuracy_reward": 0.271484375, "rewards/batch_coverage_0": 0.09881696254014968, "rewards/batch_coverage_1": 0.09881696254014968, "rewards/batch_coverage_10": 0.13585815876722335, "rewards/batch_coverage_15": 0.15109447687864302, "rewards/batch_coverage_20": 0.1667906880378723, "rewards/batch_coverage_25": 0.18214811384677887, "rewards/batch_coverage_5": 0.11958295553922653, "rewards/brier_reward": 0.48470067977905273, "rewards/confidence_uniqueness_reward": 0.6335558533668518, "rewards/format_reward": 0.87490234375, "rewards/frontier_aurc_reward": 0.28310426576063036, "rewards/frontier_ece_reward": 0.27106482833623885, "rewards/frontier_entropy_batch_reward": -0.8278069615364074, "signal/accuracy_reward/centered_abs_mean": 0.19892578125, "signal/accuracy_reward/group_std_mean": 0.24604250788688659, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.099462890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.099462890625, "signal/advantage_abs_mean": 0.27411369383335116, "signal/advantage_pre_scale_abs_mean": 0.27411369383335116, "signal/advantage_pre_scale_std": 0.37025115489959715, "signal/advantage_std": 0.37025115489959715, "signal/batch_coverage_0/centered_abs_mean": 0.10894110947847366, "signal/batch_coverage_0/group_std_mean": 0.1668031245470047, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.010894111171364785, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.010894111171364785, "signal/batch_coverage_1/centered_abs_mean": 0.10894110947847366, "signal/batch_coverage_1/group_std_mean": 0.1668031245470047, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.010894111171364785, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.010894111171364785, "signal/batch_coverage_10/centered_abs_mean": 0.1243826374411583, "signal/batch_coverage_10/group_std_mean": 0.18446942567825317, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012438264302909374, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012438264302909374, "signal/batch_coverage_15/centered_abs_mean": 0.13416066765785217, "signal/batch_coverage_15/group_std_mean": 0.19630922377109528, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013416066579520703, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013416066579520703, "signal/batch_coverage_20/centered_abs_mean": 0.14895718097686766, "signal/batch_coverage_20/group_std_mean": 0.2133055567741394, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01489571835845709, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01489571835845709, "signal/batch_coverage_25/centered_abs_mean": 0.16923868358135224, "signal/batch_coverage_25/group_std_mean": 0.23628869652748108, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016923869028687477, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016923869028687477, "signal/batch_coverage_5/centered_abs_mean": 0.11639369875192643, "signal/batch_coverage_5/group_std_mean": 0.17475835084915162, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011639369651675224, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.011639369651675224, "signal/brier_reward/centered_abs_mean": 0.2798484146595001, "signal/brier_reward/group_std_mean": 0.33471320271492006, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02798484228551388, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02798484228551388, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21195151209831237, "signal/confidence_uniqueness_reward/group_std_mean": 0.27575126886367796, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021195151284337042, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021195151284337042, "signal/format_reward/centered_abs_mean": 0.204010009765625, "signal/format_reward/group_std_mean": 0.30632642805576327, "signal/format_reward/group_zero_std_frac": 0.053125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1020050048828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1020050048828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.2093428259715438, "signal/frontier_aurc_reward/group_std_mean": 0.25139847891405226, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0026167853160586675, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0026167853160586675, "signal/frontier_ece_reward/centered_abs_mean": 0.2434532254934311, "signal/frontier_ece_reward/group_std_mean": 0.29156210720539094, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024345323070883752, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024345323070883752, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2686063975095749, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37301525473594666, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.009375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026860639825463294, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026860639825463294, "step": 15 }, { "calibration/aurc": 0.522754084713956, "calibration/batch_distribution_entropy": 0.738960728697382, "calibration/buffer_distribution_entropy": 0.6672639500396118, "calibration/confidence_entropy": 0.3410296290421736, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.35869172803053007, "calibration/mean_confidence": 0.7331278499437659, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00751953125, "completions/max_length": 1419.2, "completions/max_terminated_length": 1419.2, "completions/mean_length": 133.2443359375, "completions/mean_terminated_length": 134.2860855102539, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.064, "grad_norm": 0.01446569710969925, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 67339508.0, "reward": 0.8069659948349, "reward_std": 0.29989936351776125, "rewards/accuracy_reward": 0.32421875, "rewards/batch_coverage_0": 0.12223946899175644, "rewards/batch_coverage_1": 0.12223946899175644, "rewards/batch_coverage_10": 0.18216916620731355, "rewards/batch_coverage_15": 0.19795053601264953, "rewards/batch_coverage_20": 0.2243308424949646, "rewards/batch_coverage_25": 0.23708344697952272, "rewards/batch_coverage_5": 0.14572731405496597, "rewards/brier_reward": 0.5832386374473572, "rewards/confidence_uniqueness_reward": 0.7324810743331909, "rewards/format_reward": 0.96865234375, "rewards/frontier_aurc_reward": -0.006812120229005814, "rewards/frontier_ece_reward": -0.04467292013578117, "rewards/frontier_entropy_batch_reward": -0.8966308832168579, "signal/accuracy_reward/centered_abs_mean": 0.20263671875, "signal/accuracy_reward/group_std_mean": 0.2531333029270172, "signal/accuracy_reward/group_zero_std_frac": 0.34375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.101318359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.101318359375, "signal/advantage_abs_mean": 0.2353407621383667, "signal/advantage_pre_scale_abs_mean": 0.2353407621383667, "signal/advantage_pre_scale_std": 0.3037436902523041, "signal/advantage_std": 0.3037436902523041, "signal/batch_coverage_0/centered_abs_mean": 0.16245611310005187, "signal/batch_coverage_0/group_std_mean": 0.23634712100028993, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01624561119824648, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01624561119824648, "signal/batch_coverage_1/centered_abs_mean": 0.16245611310005187, "signal/batch_coverage_1/group_std_mean": 0.23634712100028993, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01624561119824648, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01624561119824648, "signal/batch_coverage_10/centered_abs_mean": 0.18128455579280853, "signal/batch_coverage_10/group_std_mean": 0.2564110219478607, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018128455430269242, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.018128455430269242, "signal/batch_coverage_15/centered_abs_mean": 0.18811092674732208, "signal/batch_coverage_15/group_std_mean": 0.26361055076122286, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018811094015836714, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.018811094015836714, "signal/batch_coverage_20/centered_abs_mean": 0.20965143740177156, "signal/batch_coverage_20/group_std_mean": 0.2887492120265961, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020965144410729407, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.020965144410729407, "signal/batch_coverage_25/centered_abs_mean": 0.22187889814376832, "signal/batch_coverage_25/group_std_mean": 0.30295662879943847, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.022187890857458113, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.022187890857458113, "signal/batch_coverage_5/centered_abs_mean": 0.1677585393190384, "signal/batch_coverage_5/group_std_mean": 0.24008685052394868, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01677585393190384, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01677585393190384, "signal/brier_reward/centered_abs_mean": 0.267183381319046, "signal/brier_reward/group_std_mean": 0.32429797053337095, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026718338951468468, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.026718338951468468, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15027010142803193, "signal/confidence_uniqueness_reward/group_std_mean": 0.19157288372516632, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015027010440826416, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015027010440826416, "signal/format_reward/centered_abs_mean": 0.053790283203125, "signal/format_reward/group_std_mean": 0.11199843138456345, "signal/format_reward/group_zero_std_frac": 0.496875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0268951416015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0268951416015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.004937331937253475, "signal/frontier_aurc_reward/group_std_mean": 0.006630830001085997, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.171664863359183e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.171664863359183e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.14534821808338166, "signal/frontier_ece_reward/group_std_mean": 0.17288282215595246, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01453482247889042, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01453482247889042, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17549372911453248, "signal/frontier_entropy_batch_reward/group_std_mean": 0.30022566914558413, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01754937395453453, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01754937395453453, "step": 20 }, { "calibration/aurc": 0.6308852649276485, "calibration/batch_distribution_entropy": 0.7947961294672216, "calibration/buffer_distribution_entropy": 0.7157307539372637, "calibration/confidence_entropy": 0.28688856649436556, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2848839505115735, "calibration/mean_confidence": 0.4834632274141229, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0044921875, "completions/max_length": 765.8, "completions/max_terminated_length": 765.8, "completions/mean_length": 108.28251953125, "completions/mean_terminated_length": 108.77434997558593, "completions/min_length": 0.0, "completions/min_terminated_length": 13.4, "epoch": 0.08, "grad_norm": 0.03482503816485405, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 83381473.0, "reward": 0.9376622438430786, "reward_std": 0.31385440230369566, "rewards/accuracy_reward": 0.32197265625, "rewards/batch_coverage_0": 0.2897476017475128, "rewards/batch_coverage_1": 0.2897476017475128, "rewards/batch_coverage_10": 0.3420256972312927, "rewards/batch_coverage_15": 0.3555846869945526, "rewards/batch_coverage_20": 0.3710071802139282, "rewards/batch_coverage_25": 0.37447254061698915, "rewards/batch_coverage_5": 0.3116483360528946, "rewards/brier_reward": 0.6967913031578064, "rewards/confidence_uniqueness_reward": 0.6951715230941773, "rewards/format_reward": 0.9861328125, "rewards/frontier_aurc_reward": -0.005871403589844703, "rewards/frontier_ece_reward": -0.014616276603192091, "rewards/frontier_entropy_batch_reward": -0.8747512936592102, "signal/accuracy_reward/centered_abs_mean": 0.191436767578125, "signal/accuracy_reward/group_std_mean": 0.2405393362045288, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0957183837890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0957183837890625, "signal/advantage_abs_mean": 0.26043112874031066, "signal/advantage_pre_scale_abs_mean": 0.26043112874031066, "signal/advantage_pre_scale_std": 0.3133840084075928, "signal/advantage_std": 0.3133840084075928, "signal/batch_coverage_0/centered_abs_mean": 0.30262792110443115, "signal/batch_coverage_0/group_std_mean": 0.3807533621788025, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.030262791365385056, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.030262791365385056, "signal/batch_coverage_1/centered_abs_mean": 0.30262792110443115, "signal/batch_coverage_1/group_std_mean": 0.3807533621788025, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.030262791365385056, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.030262791365385056, "signal/batch_coverage_10/centered_abs_mean": 0.317511785030365, "signal/batch_coverage_10/group_std_mean": 0.39243263006210327, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.031751178577542304, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.031751178577542304, "signal/batch_coverage_15/centered_abs_mean": 0.3235657215118408, "signal/batch_coverage_15/group_std_mean": 0.39802160263061526, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.032356572523713115, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.032356572523713115, "signal/batch_coverage_20/centered_abs_mean": 0.333120733499527, "signal/batch_coverage_20/group_std_mean": 0.4084656834602356, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.033312073722481725, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.033312073722481725, "signal/batch_coverage_25/centered_abs_mean": 0.3283375442028046, "signal/batch_coverage_25/group_std_mean": 0.4016383528709412, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.032833756506443025, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.032833756506443025, "signal/batch_coverage_5/centered_abs_mean": 0.30745909810066224, "signal/batch_coverage_5/group_std_mean": 0.38337884545326234, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.030745909363031388, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.030745909363031388, "signal/brier_reward/centered_abs_mean": 0.2754803538322449, "signal/brier_reward/group_std_mean": 0.33621970415115354, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027548035979270934, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.027548035979270934, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19807184338569642, "signal/confidence_uniqueness_reward/group_std_mean": 0.22963170409202577, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01980718448758125, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01980718448758125, "signal/format_reward/centered_abs_mean": 0.0231201171875, "signal/format_reward/group_std_mean": 0.051596745103597644, "signal/format_reward/group_zero_std_frac": 0.75, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01156005859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01156005859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036453432869166134, "signal/frontier_aurc_reward/group_std_mean": 0.005177795048803091, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5566792687168344e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5566792687168344e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.11404286623001099, "signal/frontier_ece_reward/group_std_mean": 0.14420087337493898, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01140428688377142, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01140428688377142, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21047287285327912, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3506768882274628, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.025, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021047287806868552, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021047287806868552, "step": 25 }, { "calibration/aurc": 0.7548298680504103, "calibration/batch_distribution_entropy": 0.4567463163976795, "calibration/buffer_distribution_entropy": 0.7713618183929173, "calibration/confidence_entropy": 0.12036518782927541, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.030528375733855185, "calibration/coverage@30%": 0.04031311154598825, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14627281235381873, "calibration/mean_confidence": 0.15631772699737523, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 1229.4, "completions/max_terminated_length": 1229.4, "completions/mean_length": 113.3099609375, "completions/mean_terminated_length": 113.5542236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 17.4, "epoch": 0.096, "grad_norm": 0.004159490577876568, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 99586375.0, "reward": 0.9887324690818786, "reward_std": 0.2425031691789627, "rewards/accuracy_reward": 0.21640625, "rewards/batch_coverage_0": 0.5490703642368316, "rewards/batch_coverage_1": 0.5490703642368316, "rewards/batch_coverage_10": 0.5690686941146851, "rewards/batch_coverage_15": 0.5694445013999939, "rewards/batch_coverage_20": 0.5644742488861084, "rewards/batch_coverage_25": 0.5636660337448121, "rewards/batch_coverage_5": 0.557263171672821, "rewards/brier_reward": 0.8261630177497864, "rewards/confidence_uniqueness_reward": -0.055358816683292386, "rewards/format_reward": 0.993359375, "rewards/frontier_aurc_reward": -0.0052281500771641735, "rewards/frontier_ece_reward": 0.006546266423538327, "rewards/frontier_entropy_batch_reward": -0.8602579355239868, "signal/accuracy_reward/centered_abs_mean": 0.1811279296875, "signal/accuracy_reward/group_std_mean": 0.2211446762084961, "signal/accuracy_reward/group_zero_std_frac": 0.4375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09056396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09056396484375, "signal/advantage_abs_mean": 0.1823619097471237, "signal/advantage_pre_scale_abs_mean": 0.1823619097471237, "signal/advantage_pre_scale_std": 0.2679218739271164, "signal/advantage_std": 0.2679218739271164, "signal/batch_coverage_0/centered_abs_mean": 0.2666642487049103, "signal/batch_coverage_0/group_std_mean": 0.34697710871696474, "signal/batch_coverage_0/group_zero_std_frac": 0.059375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.026666425913572312, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.026666425913572312, "signal/batch_coverage_1/centered_abs_mean": 0.2666642487049103, "signal/batch_coverage_1/group_std_mean": 0.34697710871696474, "signal/batch_coverage_1/group_zero_std_frac": 0.059375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.026666425913572312, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.026666425913572312, "signal/batch_coverage_10/centered_abs_mean": 0.26514615416526793, "signal/batch_coverage_10/group_std_mean": 0.3440410554409027, "signal/batch_coverage_10/group_zero_std_frac": 0.059375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02651461660861969, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02651461660861969, "signal/batch_coverage_15/centered_abs_mean": 0.2547744870185852, "signal/batch_coverage_15/group_std_mean": 0.33346186876296996, "signal/batch_coverage_15/group_zero_std_frac": 0.059375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.025477449968457223, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.025477449968457223, "signal/batch_coverage_20/centered_abs_mean": 0.24541459679603578, "signal/batch_coverage_20/group_std_mean": 0.3252484619617462, "signal/batch_coverage_20/group_zero_std_frac": 0.059375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.024541460536420347, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.024541460536420347, "signal/batch_coverage_25/centered_abs_mean": 0.24044472873210906, "signal/batch_coverage_25/group_std_mean": 0.32090034484863283, "signal/batch_coverage_25/group_zero_std_frac": 0.059375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.024044474214315416, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.024044474214315416, "signal/batch_coverage_5/centered_abs_mean": 0.2699785053730011, "signal/batch_coverage_5/group_std_mean": 0.35016059279441836, "signal/batch_coverage_5/group_zero_std_frac": 0.059375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.026997851207852364, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.026997851207852364, "signal/brier_reward/centered_abs_mean": 0.20681797564029694, "signal/brier_reward/group_std_mean": 0.27285282015800477, "signal/brier_reward/group_zero_std_frac": 0.053125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020681798458099365, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020681798458099365, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.37839528918266296, "signal/confidence_uniqueness_reward/group_std_mean": 0.47339463233947754, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.065625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03783952966332436, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03783952966332436, "signal/format_reward/centered_abs_mean": 0.01219482421875, "signal/format_reward/group_std_mean": 0.031007519736886025, "signal/format_reward/group_zero_std_frac": 0.8375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006097412109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006097412109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016961080371402204, "signal/frontier_aurc_reward/group_std_mean": 0.0027939900755882263, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1201351046329364e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1201351046329364e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04961966909468174, "signal/frontier_ece_reward/group_std_mean": 0.07686802893877029, "signal/frontier_ece_reward/group_zero_std_frac": 0.053125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0049619670957326886, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0049619670957326886, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22081681489944457, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33247880935668944, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022081680968403815, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022081680968403815, "step": 30 }, { "calibration/aurc": 0.7739565546771938, "calibration/batch_distribution_entropy": 0.2388587210176225, "calibration/buffer_distribution_entropy": 0.7697787894619414, "calibration/confidence_entropy": 0.04604423048437065, "calibration/coverage@0%": 0.01215686274509804, "calibration/coverage@1%": 0.01215686274509804, "calibration/coverage@10%": 0.027936138882430444, "calibration/coverage@15%": 0.030681236921646128, "calibration/coverage@20%": 0.05222619536742088, "calibration/coverage@25%": 0.06124119862899948, "calibration/coverage@30%": 0.07420732719965457, "calibration/coverage@5%": 0.012549019607843137, "calibration/ece": 0.1059488157666342, "calibration/mean_confidence": 0.06527309548961124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00244140625, "completions/max_length": 767.8, "completions/max_terminated_length": 767.8, "completions/mean_length": 117.16904296875, "completions/mean_terminated_length": 117.45823516845704, "completions/min_length": 0.0, "completions/min_terminated_length": 19.2, "epoch": 0.112, "grad_norm": 0.01868264377117157, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 115895658.0, "reward": 0.9797860622406006, "reward_std": 0.16158705055713654, "rewards/accuracy_reward": 0.1146484375, "rewards/batch_coverage_0": 0.6942908763885498, "rewards/batch_coverage_1": 0.6942908763885498, "rewards/batch_coverage_10": 0.699905002117157, "rewards/batch_coverage_15": 0.7006630778312684, "rewards/batch_coverage_20": 0.6973462700843811, "rewards/batch_coverage_25": 0.6931140184402466, "rewards/batch_coverage_5": 0.6949953079223633, "rewards/brier_reward": 0.910808777809143, "rewards/confidence_uniqueness_reward": -0.6458814024925232, "rewards/format_reward": 0.995703125, "rewards/frontier_aurc_reward": -0.005381030216813088, "rewards/frontier_ece_reward": 0.008919767942279578, "rewards/frontier_entropy_batch_reward": -0.9016774773597718, "signal/accuracy_reward/centered_abs_mean": 0.1273193359375, "signal/accuracy_reward/group_std_mean": 0.16958228945732118, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06365966796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06365966796875, "signal/advantage_abs_mean": 0.10710721462965012, "signal/advantage_pre_scale_abs_mean": 0.10710721462965012, "signal/advantage_pre_scale_std": 0.21973832249641417, "signal/advantage_std": 0.21973832249641417, "signal/batch_coverage_0/centered_abs_mean": 0.13674386590719223, "signal/batch_coverage_0/group_std_mean": 0.20728526413440704, "signal/batch_coverage_0/group_zero_std_frac": 0.278125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013674386776983738, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013674386776983738, "signal/batch_coverage_1/centered_abs_mean": 0.13674386590719223, "signal/batch_coverage_1/group_std_mean": 0.20728526413440704, "signal/batch_coverage_1/group_zero_std_frac": 0.278125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013674386776983738, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013674386776983738, "signal/batch_coverage_10/centered_abs_mean": 0.13614047914743424, "signal/batch_coverage_10/group_std_mean": 0.20697369873523713, "signal/batch_coverage_10/group_zero_std_frac": 0.278125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013614048063755036, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013614048063755036, "signal/batch_coverage_15/centered_abs_mean": 0.13347171396017074, "signal/batch_coverage_15/group_std_mean": 0.20461286902427672, "signal/batch_coverage_15/group_zero_std_frac": 0.278125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013347171433269977, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013347171433269977, "signal/batch_coverage_20/centered_abs_mean": 0.13021294325590133, "signal/batch_coverage_20/group_std_mean": 0.2026127427816391, "signal/batch_coverage_20/group_zero_std_frac": 0.278125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013021294586360454, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013021294586360454, "signal/batch_coverage_25/centered_abs_mean": 0.12717701941728593, "signal/batch_coverage_25/group_std_mean": 0.1998313844203949, "signal/batch_coverage_25/group_zero_std_frac": 0.278125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012717702239751816, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.012717702239751816, "signal/batch_coverage_5/centered_abs_mean": 0.13720745891332625, "signal/batch_coverage_5/group_std_mean": 0.20782139897346497, "signal/batch_coverage_5/group_zero_std_frac": 0.278125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013720746710896492, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013720746710896492, "signal/brier_reward/centered_abs_mean": 0.12075443416833878, "signal/brier_reward/group_std_mean": 0.175453719496727, "signal/brier_reward/group_zero_std_frac": 0.278125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012075443752110004, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012075443752110004, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22815002501010895, "signal/confidence_uniqueness_reward/group_std_mean": 0.3402702987194061, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.334375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022815002501010893, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022815002501010893, "signal/format_reward/centered_abs_mean": 0.00767822265625, "signal/format_reward/group_std_mean": 0.01842188462615013, "signal/format_reward/group_zero_std_frac": 0.90625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003839111328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003839111328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0008029210846871137, "signal/frontier_aurc_reward/group_std_mean": 0.001404245011508465, "signal/frontier_aurc_reward/group_zero_std_frac": 0.05, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.003651377686765e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.003651377686765e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01746743656694889, "signal/frontier_ece_reward/group_std_mean": 0.03470666408538818, "signal/frontier_ece_reward/group_zero_std_frac": 0.275, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017467437544837594, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017467437544837594, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14744934737682341, "signal/frontier_entropy_batch_reward/group_std_mean": 0.23726414144039154, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.35625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014744934998452664, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014744934998452664, "step": 35 }, { "calibration/aurc": 0.8121248519963785, "calibration/batch_distribution_entropy": 0.22457522420408763, "calibration/buffer_distribution_entropy": 0.7391312668108464, "calibration/confidence_entropy": 0.05101885456238644, "calibration/coverage@0%": 0.0027497207134327207, "calibration/coverage@1%": 0.0027497207134327207, "calibration/coverage@10%": 0.018466813051350207, "calibration/coverage@15%": 0.04158185770742347, "calibration/coverage@20%": 0.06825324052704215, "calibration/coverage@25%": 0.08276386917228877, "calibration/coverage@30%": 0.09334061110516817, "calibration/coverage@5%": 0.010608266882391464, "calibration/ece": 0.06685038780857414, "calibration/mean_confidence": 0.05442588489874869, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00224609375, "completions/max_length": 450.6, "completions/max_terminated_length": 450.6, "completions/mean_length": 130.52333984375, "completions/mean_terminated_length": 130.82366027832032, "completions/min_length": 0.0, "completions/min_terminated_length": 42.2, "epoch": 0.128, "grad_norm": 0.010998404584825039, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 132148889.0, "reward": 0.9904319882392884, "reward_std": 0.1573132574558258, "rewards/accuracy_reward": 0.11435546875, "rewards/batch_coverage_0": 0.6873297572135926, "rewards/batch_coverage_1": 0.6873297572135926, "rewards/batch_coverage_10": 0.6906278967857361, "rewards/batch_coverage_15": 0.6921732783317566, "rewards/batch_coverage_20": 0.6892393708229065, "rewards/batch_coverage_25": 0.6890079140663147, "rewards/batch_coverage_5": 0.689215111732483, "rewards/brier_reward": 0.9234801173210144, "rewards/confidence_uniqueness_reward": -0.5425438076257706, "rewards/format_reward": 0.99609375, "rewards/frontier_aurc_reward": -0.0053546403534710406, "rewards/frontier_ece_reward": 0.007387215364724397, "rewards/frontier_entropy_batch_reward": -0.8605040788650513, "signal/accuracy_reward/centered_abs_mean": 0.117291259765625, "signal/accuracy_reward/group_std_mean": 0.15457678139209746, "signal/accuracy_reward/group_zero_std_frac": 0.565625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0586456298828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0586456298828125, "signal/advantage_abs_mean": 0.10570785701274872, "signal/advantage_pre_scale_abs_mean": 0.10570785701274872, "signal/advantage_pre_scale_std": 0.21785781383514405, "signal/advantage_std": 0.21785781383514405, "signal/batch_coverage_0/centered_abs_mean": 0.10327832996845246, "signal/batch_coverage_0/group_std_mean": 0.16254315078258513, "signal/batch_coverage_0/group_zero_std_frac": 0.25, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.010327833332121372, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.010327833332121372, "signal/batch_coverage_1/centered_abs_mean": 0.10327832996845246, "signal/batch_coverage_1/group_std_mean": 0.16254315078258513, "signal/batch_coverage_1/group_zero_std_frac": 0.25, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.010327833332121372, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.010327833332121372, "signal/batch_coverage_10/centered_abs_mean": 0.10503756999969482, "signal/batch_coverage_10/group_std_mean": 0.16518883407115936, "signal/batch_coverage_10/group_zero_std_frac": 0.25, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.010503756627440453, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.010503756627440453, "signal/batch_coverage_15/centered_abs_mean": 0.10226047039031982, "signal/batch_coverage_15/group_std_mean": 0.16291693598031998, "signal/batch_coverage_15/group_zero_std_frac": 0.25, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.010226047411561013, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.010226047411561013, "signal/batch_coverage_20/centered_abs_mean": 0.10146261975169182, "signal/batch_coverage_20/group_std_mean": 0.1630474865436554, "signal/batch_coverage_20/group_zero_std_frac": 0.25, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.010146262310445308, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.010146262310445308, "signal/batch_coverage_25/centered_abs_mean": 0.09983718618750573, "signal/batch_coverage_25/group_std_mean": 0.1619284689426422, "signal/batch_coverage_25/group_zero_std_frac": 0.25, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.009983718767762184, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.009983718767762184, "signal/batch_coverage_5/centered_abs_mean": 0.10402362942695617, "signal/batch_coverage_5/group_std_mean": 0.16306559443473817, "signal/batch_coverage_5/group_zero_std_frac": 0.25, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.010402363166213036, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.010402363166213036, "signal/brier_reward/centered_abs_mean": 0.10254615992307663, "signal/brier_reward/group_std_mean": 0.1524132326245308, "signal/brier_reward/group_zero_std_frac": 0.259375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010254616104066371, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010254616104066371, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2588571161031723, "signal/confidence_uniqueness_reward/group_std_mean": 0.36650630831718445, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.3125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02588571347296238, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02588571347296238, "signal/format_reward/centered_abs_mean": 0.00748291015625, "signal/format_reward/group_std_mean": 0.020297119021415712, "signal/format_reward/group_zero_std_frac": 0.890625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003741455078125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003741455078125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0009247717563994229, "signal/frontier_aurc_reward/group_std_mean": 0.0014879585476592183, "signal/frontier_aurc_reward/group_zero_std_frac": 0.121875, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.155964746430982e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.155964746430982e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.019005920551717282, "signal/frontier_ece_reward/group_std_mean": 0.0374838687479496, "signal/frontier_ece_reward/group_zero_std_frac": 0.271875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019005921203643084, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019005921203643084, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1939413219690323, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2823514461517334, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.309375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01939413193613291, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01939413193613291, "step": 40 }, { "calibration/aurc": 0.50818107219839, "calibration/batch_distribution_entropy": 0.5765797611568575, "calibration/buffer_distribution_entropy": 0.7184762427451847, "calibration/confidence_entropy": 0.15950569358905747, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.09775760184714125, "calibration/coverage@15%": 0.15666653621883925, "calibration/coverage@20%": 0.21749429764877912, "calibration/coverage@25%": 0.24952578011709087, "calibration/coverage@30%": 0.31821007469375323, "calibration/coverage@5%": 0.014624505928853754, "calibration/ece": 0.1193030803702988, "calibration/mean_confidence": 0.20761059397605647, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00859375, "completions/max_length": 610.6, "completions/max_terminated_length": 610.6, "completions/mean_length": 150.23427734375, "completions/mean_terminated_length": 151.54664611816406, "completions/min_length": 0.0, "completions/min_terminated_length": 56.4, "epoch": 0.144, "grad_norm": 0.007136950735002756, "learning_rate": 1e-06, "loss": -0.009, "num_tokens": 148637720.0, "reward": 1.0417076230049134, "reward_std": 0.2498043328523636, "rewards/accuracy_reward": 0.27431640625, "rewards/batch_coverage_0": 0.5050425410270691, "rewards/batch_coverage_1": 0.5050425410270691, "rewards/batch_coverage_10": 0.5385935366153717, "rewards/batch_coverage_15": 0.535876190662384, "rewards/batch_coverage_20": 0.5360523641109467, "rewards/batch_coverage_25": 0.5326862096786499, "rewards/batch_coverage_5": 0.5235825717449188, "rewards/brier_reward": 0.8559035062789917, "rewards/confidence_uniqueness_reward": 0.2509185492992401, "rewards/format_reward": 0.98984375, "rewards/frontier_aurc_reward": -0.0038861197885125876, "rewards/frontier_ece_reward": 0.01809915155172348, "rewards/frontier_entropy_batch_reward": -0.7050358533859253, "signal/accuracy_reward/centered_abs_mean": 0.185162353515625, "signal/accuracy_reward/group_std_mean": 0.2300116926431656, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0925811767578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0925811767578125, "signal/advantage_abs_mean": 0.18442415297031403, "signal/advantage_pre_scale_abs_mean": 0.18442415297031403, "signal/advantage_pre_scale_std": 0.28187933266162873, "signal/advantage_std": 0.28187933266162873, "signal/batch_coverage_0/centered_abs_mean": 0.19961527585983277, "signal/batch_coverage_0/group_std_mean": 0.2660757750272751, "signal/batch_coverage_0/group_zero_std_frac": 0.03125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01996152810752392, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01996152810752392, "signal/batch_coverage_1/centered_abs_mean": 0.19961527585983277, "signal/batch_coverage_1/group_std_mean": 0.2660757750272751, "signal/batch_coverage_1/group_zero_std_frac": 0.03125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01996152810752392, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01996152810752392, "signal/batch_coverage_10/centered_abs_mean": 0.20459548830986024, "signal/batch_coverage_10/group_std_mean": 0.2712189704179764, "signal/batch_coverage_10/group_zero_std_frac": 0.03125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020459549874067305, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.020459549874067305, "signal/batch_coverage_15/centered_abs_mean": 0.19978629052639008, "signal/batch_coverage_15/group_std_mean": 0.26815671324729917, "signal/batch_coverage_15/group_zero_std_frac": 0.03125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01997862905263901, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01997862905263901, "signal/batch_coverage_20/centered_abs_mean": 0.19783964157104492, "signal/batch_coverage_20/group_std_mean": 0.26660596430301664, "signal/batch_coverage_20/group_zero_std_frac": 0.03125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019783964194357396, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019783964194357396, "signal/batch_coverage_25/centered_abs_mean": 0.19625370502471923, "signal/batch_coverage_25/group_std_mean": 0.2650555014610291, "signal/batch_coverage_25/group_zero_std_frac": 0.03125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.019625371135771275, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.019625371135771275, "signal/batch_coverage_5/centered_abs_mean": 0.20749586820602417, "signal/batch_coverage_5/group_std_mean": 0.27330430746078493, "signal/batch_coverage_5/group_zero_std_frac": 0.03125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02074958700686693, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.02074958700686693, "signal/brier_reward/centered_abs_mean": 0.16967381834983825, "signal/brier_reward/group_std_mean": 0.23079933822155, "signal/brier_reward/group_zero_std_frac": 0.03125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016967381909489633, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016967381909489633, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.3156610667705536, "signal/confidence_uniqueness_reward/group_std_mean": 0.3933307945728302, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.034375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031566106528043744, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.031566106528043744, "signal/format_reward/centered_abs_mean": 0.01905517578125, "signal/format_reward/group_std_mean": 0.04808509647846222, "signal/format_reward/group_zero_std_frac": 0.753125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009527587890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009527587890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002761203283444047, "signal/frontier_aurc_reward/group_std_mean": 0.003953706519678235, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00625, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4515042352722956e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4515042352722956e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05492620766162872, "signal/frontier_ece_reward/group_std_mean": 0.08251123428344727, "signal/frontier_ece_reward/group_zero_std_frac": 0.034375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005492620915174484, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005492620915174484, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35702455043792725, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4454483091831207, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03570245616137981, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03570245616137981, "step": 45 }, { "calibration/aurc": 0.3948438466039467, "calibration/batch_distribution_entropy": 0.7543689498325424, "calibration/buffer_distribution_entropy": 0.7218805161850744, "calibration/confidence_entropy": 0.2475598733219218, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.029306930693069305, "calibration/coverage@20%": 0.04554455445544554, "calibration/coverage@25%": 0.23985486146123733, "calibration/coverage@30%": 0.38845830147412347, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1511983118975085, "calibration/mean_confidence": 0.3666756006103048, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008984375, "completions/max_length": 529.8, "completions/max_terminated_length": 529.8, "completions/mean_length": 166.7224609375, "completions/mean_terminated_length": 168.22821655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.16, "grad_norm": 0.002798353089019656, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 165365886.0, "reward": 1.117294979095459, "reward_std": 0.26995018124580383, "rewards/accuracy_reward": 0.3697265625, "rewards/batch_coverage_0": 0.4271726250648499, "rewards/batch_coverage_1": 0.4271726250648499, "rewards/batch_coverage_10": 0.49650421142578127, "rewards/batch_coverage_15": 0.5068722724914551, "rewards/batch_coverage_20": 0.5134539842605591, "rewards/batch_coverage_25": 0.5107719004154205, "rewards/batch_coverage_5": 0.4713205575942993, "rewards/brier_reward": 0.8003980636596679, "rewards/confidence_uniqueness_reward": 0.7806328177452088, "rewards/format_reward": 0.990625, "rewards/frontier_aurc_reward": -0.00399963753297925, "rewards/frontier_ece_reward": 0.01983025260269642, "rewards/frontier_entropy_batch_reward": -0.5824374079704284, "signal/accuracy_reward/centered_abs_mean": 0.16785888671875, "signal/accuracy_reward/group_std_mean": 0.21611510515213012, "signal/accuracy_reward/group_zero_std_frac": 0.403125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083929443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.083929443359375, "signal/advantage_abs_mean": 0.20416966676712037, "signal/advantage_pre_scale_abs_mean": 0.20416966676712037, "signal/advantage_pre_scale_std": 0.28179879784584044, "signal/advantage_std": 0.28179879784584044, "signal/batch_coverage_0/centered_abs_mean": 0.24080052077770234, "signal/batch_coverage_0/group_std_mean": 0.3102306604385376, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.024080053344368933, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.024080053344368933, "signal/batch_coverage_1/centered_abs_mean": 0.24080052077770234, "signal/batch_coverage_1/group_std_mean": 0.3102306604385376, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.024080053344368933, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.024080053344368933, "signal/batch_coverage_10/centered_abs_mean": 0.2546322673559189, "signal/batch_coverage_10/group_std_mean": 0.32426918745040895, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.025463227182626724, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.025463227182626724, "signal/batch_coverage_15/centered_abs_mean": 0.25954718291759493, "signal/batch_coverage_15/group_std_mean": 0.32985265254974366, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02595471851527691, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.02595471851527691, "signal/batch_coverage_20/centered_abs_mean": 0.2642736345529556, "signal/batch_coverage_20/group_std_mean": 0.3358638346195221, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.026427363604307176, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.026427363604307176, "signal/batch_coverage_25/centered_abs_mean": 0.2585195034742355, "signal/batch_coverage_25/group_std_mean": 0.3294057846069336, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02585195079445839, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.02585195079445839, "signal/batch_coverage_5/centered_abs_mean": 0.2501774102449417, "signal/batch_coverage_5/group_std_mean": 0.3195813298225403, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.025017741695046426, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.025017741695046426, "signal/brier_reward/centered_abs_mean": 0.20010380446910858, "signal/brier_reward/group_std_mean": 0.2640431046485901, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020010380446910857, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020010380446910857, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.14535830169916153, "signal/confidence_uniqueness_reward/group_std_mean": 0.1824635833501816, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014535830728709698, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014535830728709698, "signal/format_reward/centered_abs_mean": 0.01787109375, "signal/format_reward/group_std_mean": 0.04609687626361847, "signal/format_reward/group_zero_std_frac": 0.7625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008935546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008935546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00380462440662086, "signal/frontier_aurc_reward/group_std_mean": 0.005796490237116814, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.755780755658634e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.755780755658634e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.07341517955064773, "signal/frontier_ece_reward/group_std_mean": 0.101849465072155, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007341517694294452, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007341517694294452, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4398982286453247, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5136456608772277, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.043989823758602144, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043989823758602144, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.5988549197356936, "eval_calibration/batch_distribution_entropy": 0.6807624415706247, "eval_calibration/buffer_distribution_entropy": 0.7286592836154312, "eval_calibration/confidence_entropy": 0.259052853462329, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.078125, "eval_calibration/coverage@25%": 0.078125, "eval_calibration/coverage@30%": 0.0859375, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.2969021597053368, "eval_calibration/mean_confidence": 0.4876565724405962, "eval_completions/clipped_ratio": 0.00390625, "eval_completions/max_length": 358.5, "eval_completions/max_terminated_length": 358.5, "eval_completions/mean_length": 174.25215530395508, "eval_completions/mean_terminated_length": 174.94443893432617, "eval_completions/min_length": 45.75, "eval_completions/min_terminated_length": 94.5, "eval_loss": 0.0, "eval_num_tokens": 165365886.0, "eval_reward": 0.9293602705001831, "eval_reward_std": 0.29658710211515427, "eval_rewards/accuracy_reward": 0.3125, "eval_rewards/batch_coverage_0": 0.3174902945756912, "eval_rewards/batch_coverage_1": 0.3174902945756912, "eval_rewards/batch_coverage_10": 0.3108842074871063, "eval_rewards/batch_coverage_15": 0.30682356655597687, "eval_rewards/batch_coverage_20": 0.29854603856801987, "eval_rewards/batch_coverage_25": 0.29226575046777725, "eval_rewards/batch_coverage_5": 0.3174902945756912, "eval_rewards/brier_reward": 0.7489841133356094, "eval_rewards/confidence_uniqueness_reward": 0.8224703967571259, "eval_rewards/format_reward": 0.99609375, "eval_rewards/frontier_aurc_reward": -0.005349113023839891, "eval_rewards/frontier_ece_reward": 0.014951157238101587, "eval_rewards/frontier_entropy_batch_reward": -0.99609375, "eval_runtime": 37.8213, "eval_samples_per_second": 13.22, "eval_signal/accuracy_reward/centered_abs_mean": 0.416015625, "eval_signal/accuracy_reward/group_std_mean": 0.46149376779794693, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2080078125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2080078125, "eval_signal/advantage_abs_mean": 0.25766993314027786, "eval_signal/advantage_pre_scale_abs_mean": 0.25766993314027786, "eval_signal/advantage_pre_scale_std": 0.29480960220098495, "eval_signal/advantage_std": 0.29480960220098495, "eval_signal/batch_coverage_0/centered_abs_mean": 0.4219600185751915, "eval_signal/batch_coverage_0/group_std_mean": 0.48065635561943054, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04219600185751915, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04219600185751915, "eval_signal/batch_coverage_1/centered_abs_mean": 0.4219600185751915, "eval_signal/batch_coverage_1/group_std_mean": 0.48065635561943054, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04219600185751915, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04219600185751915, "eval_signal/batch_coverage_10/centered_abs_mean": 0.41396255046129227, "eval_signal/batch_coverage_10/group_std_mean": 0.4719335660338402, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04139625560492277, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04139625560492277, "eval_signal/batch_coverage_15/centered_abs_mean": 0.4079297110438347, "eval_signal/batch_coverage_15/group_std_mean": 0.46547579765319824, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04079297184944153, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04079297184944153, "eval_signal/batch_coverage_20/centered_abs_mean": 0.3940034508705139, "eval_signal/batch_coverage_20/group_std_mean": 0.4500989094376564, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.039400345645844936, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.039400345645844936, "eval_signal/batch_coverage_25/centered_abs_mean": 0.3857416883111, "eval_signal/batch_coverage_25/group_std_mean": 0.4417581185698509, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03857417032122612, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03857417032122612, "eval_signal/batch_coverage_5/centered_abs_mean": 0.4219600185751915, "eval_signal/batch_coverage_5/group_std_mean": 0.48065635561943054, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04219600185751915, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04219600185751915, "eval_signal/brier_reward/centered_abs_mean": 0.3073003217577934, "eval_signal/brier_reward/group_std_mean": 0.3613938093185425, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030730033293366432, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.030730033293366432, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10804207064211369, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12843790277838707, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010804207297042012, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010804207297042012, "eval_signal/format_reward/centered_abs_mean": 0.007568359375, "eval_signal/format_reward/group_std_mean": 0.022097086533904076, "eval_signal/format_reward/group_zero_std_frac": 0.875, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005103390081785619, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006918843020685017, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.379238038789481e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.379238038789481e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0694188978523016, "eval_signal/frontier_ece_reward/group_std_mean": 0.10465933568775654, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006941889994777739, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006941889994777739, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007568359724245965, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965, "eval_steps_per_second": 0.106, "step": 50 }, { "calibration/aurc": 0.4378798248603018, "calibration/batch_distribution_entropy": 0.7893052997827892, "calibration/buffer_distribution_entropy": 0.7332999397532869, "calibration/confidence_entropy": 0.2882561287184676, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.05731225296442688, "calibration/coverage@30%": 0.11750360482656066, "calibration/coverage@5%": 0.0, "calibration/ece": 0.21578943909506018, "calibration/mean_confidence": 0.5581830455108985, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00439453125, "completions/max_length": 775.8, "completions/max_terminated_length": 775.8, "completions/mean_length": 175.36943359375, "completions/mean_terminated_length": 176.1399139404297, "completions/min_length": 0.0, "completions/min_terminated_length": 75.2, "epoch": 0.176, "grad_norm": 0.0023718716111034155, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 182398789.0, "reward": 1.0906760931015014, "reward_std": 0.24937661588191987, "rewards/accuracy_reward": 0.38095703125, "rewards/batch_coverage_0": 0.35660014152526853, "rewards/batch_coverage_1": 0.35660014152526853, "rewards/batch_coverage_10": 0.4325276672840118, "rewards/batch_coverage_15": 0.4449883341789246, "rewards/batch_coverage_20": 0.4588911831378937, "rewards/batch_coverage_25": 0.463850337266922, "rewards/batch_coverage_5": 0.3945730209350586, "rewards/brier_reward": 0.7669905066490174, "rewards/confidence_uniqueness_reward": 0.8827269434928894, "rewards/format_reward": 0.9951171875, "rewards/frontier_aurc_reward": -0.004561876039952039, "rewards/frontier_ece_reward": 0.014202152192592622, "rewards/frontier_entropy_batch_reward": -0.5449908971786499, "signal/accuracy_reward/centered_abs_mean": 0.173187255859375, "signal/accuracy_reward/group_std_mean": 0.21781871914863588, "signal/accuracy_reward/group_zero_std_frac": 0.421875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0865936279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0865936279296875, "signal/advantage_abs_mean": 0.19311022162437438, "signal/advantage_pre_scale_abs_mean": 0.19311022162437438, "signal/advantage_pre_scale_std": 0.262198868393898, "signal/advantage_std": 0.262198868393898, "signal/batch_coverage_0/centered_abs_mean": 0.20709326565265657, "signal/batch_coverage_0/group_std_mean": 0.2733163952827454, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02070932649075985, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.02070932649075985, "signal/batch_coverage_1/centered_abs_mean": 0.20709326565265657, "signal/batch_coverage_1/group_std_mean": 0.2733163952827454, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02070932649075985, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.02070932649075985, "signal/batch_coverage_10/centered_abs_mean": 0.2269432097673416, "signal/batch_coverage_10/group_std_mean": 0.2966294169425964, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02269432060420513, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02269432060420513, "signal/batch_coverage_15/centered_abs_mean": 0.23294417560100555, "signal/batch_coverage_15/group_std_mean": 0.30323171615600586, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.023294418677687646, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.023294418677687646, "signal/batch_coverage_20/centered_abs_mean": 0.24196192026138305, "signal/batch_coverage_20/group_std_mean": 0.313522207736969, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.024196192249655725, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.024196192249655725, "signal/batch_coverage_25/centered_abs_mean": 0.24587258994579314, "signal/batch_coverage_25/group_std_mean": 0.3179736495018005, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02458725869655609, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.02458725869655609, "signal/batch_coverage_5/centered_abs_mean": 0.21568080186843872, "signal/batch_coverage_5/group_std_mean": 0.28273540139198305, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02156808041036129, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.02156808041036129, "signal/brier_reward/centered_abs_mean": 0.20453362464904784, "signal/brier_reward/group_std_mean": 0.2597156286239624, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020453362539410592, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020453362539410592, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08080095127224922, "signal/confidence_uniqueness_reward/group_std_mean": 0.10371316969394684, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00808009523898363, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00808009523898363, "signal/format_reward/centered_abs_mean": 0.00933837890625, "signal/format_reward/group_std_mean": 0.02481246441602707, "signal/format_reward/group_zero_std_frac": 0.86875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035921338479965926, "signal/frontier_aurc_reward/group_std_mean": 0.005274048540741205, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.490167411859147e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.490167411859147e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06365836337208748, "signal/frontier_ece_reward/group_std_mean": 0.08838188350200653, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006365836411714554, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006365836411714554, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4541351854801178, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5231335401535034, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04541352093219757, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04541352093219757, "step": 55 }, { "calibration/aurc": 0.3748090180300953, "calibration/batch_distribution_entropy": 0.7953933340069766, "calibration/buffer_distribution_entropy": 0.7446203141187778, "calibration/confidence_entropy": 0.3067337279445185, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.007827788649706457, "calibration/coverage@15%": 0.022309197651663403, "calibration/coverage@20%": 0.10722770914872799, "calibration/coverage@25%": 0.18578155577299413, "calibration/coverage@30%": 0.3459637964774951, "calibration/coverage@5%": 0.0, "calibration/ece": 0.15640222008453614, "calibration/mean_confidence": 0.5451544081436507, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 566.4, "completions/max_terminated_length": 566.4, "completions/mean_length": 182.34873046875, "completions/mean_terminated_length": 182.54359436035156, "completions/min_length": 17.6, "completions/min_terminated_length": 79.8, "epoch": 0.192, "grad_norm": 0.002586693037301302, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 199080856.0, "reward": 1.143661379814148, "reward_std": 0.23088187277317046, "rewards/accuracy_reward": 0.41591796875, "rewards/batch_coverage_0": 0.3952768325805664, "rewards/batch_coverage_1": 0.3952768325805664, "rewards/batch_coverage_10": 0.47167777419090273, "rewards/batch_coverage_15": 0.4818853437900543, "rewards/batch_coverage_20": 0.48628310561180116, "rewards/batch_coverage_25": 0.49131479263305666, "rewards/batch_coverage_5": 0.44672444462776184, "rewards/brier_reward": 0.7825680017471314, "rewards/confidence_uniqueness_reward": 0.911488664150238, "rewards/format_reward": 0.9984375, "rewards/frontier_aurc_reward": -0.00404461151920259, "rewards/frontier_ece_reward": 0.01575819170102477, "rewards/frontier_entropy_batch_reward": -0.512912106513977, "signal/accuracy_reward/centered_abs_mean": 0.144512939453125, "signal/accuracy_reward/group_std_mean": 0.19226216971874238, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0722564697265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0722564697265625, "signal/advantage_abs_mean": 0.1781569391489029, "signal/advantage_pre_scale_abs_mean": 0.1781569391489029, "signal/advantage_pre_scale_std": 0.24461511373519898, "signal/advantage_std": 0.24461511373519898, "signal/batch_coverage_0/centered_abs_mean": 0.19959236383438111, "signal/batch_coverage_0/group_std_mean": 0.25877436995506287, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019959235936403273, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.019959235936403273, "signal/batch_coverage_1/centered_abs_mean": 0.19959236383438111, "signal/batch_coverage_1/group_std_mean": 0.25877436995506287, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019959235936403273, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.019959235936403273, "signal/batch_coverage_10/centered_abs_mean": 0.22316555082798004, "signal/batch_coverage_10/group_std_mean": 0.2864723980426788, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.022316556051373483, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.022316556051373483, "signal/batch_coverage_15/centered_abs_mean": 0.22717729210853577, "signal/batch_coverage_15/group_std_mean": 0.2909119248390198, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.022717729210853577, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.022717729210853577, "signal/batch_coverage_20/centered_abs_mean": 0.22871865928173066, "signal/batch_coverage_20/group_std_mean": 0.2934320390224457, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02287186644971371, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02287186644971371, "signal/batch_coverage_25/centered_abs_mean": 0.23193451762199402, "signal/batch_coverage_25/group_std_mean": 0.2979011297225952, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0231934517621994, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0231934517621994, "signal/batch_coverage_5/centered_abs_mean": 0.2138021856546402, "signal/batch_coverage_5/group_std_mean": 0.2738446056842804, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021380218863487243, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.021380218863487243, "signal/brier_reward/centered_abs_mean": 0.18481292128562926, "signal/brier_reward/group_std_mean": 0.2386064797639847, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018481292203068732, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018481292203068732, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05636930540204048, "signal/confidence_uniqueness_reward/group_std_mean": 0.07167273461818695, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005636930651962757, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005636930651962757, "signal/format_reward/centered_abs_mean": 0.00301513671875, "signal/format_reward/group_std_mean": 0.008502526115626097, "signal/format_reward/group_zero_std_frac": 0.953125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001507568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001507568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027934785932302473, "signal/frontier_aurc_reward/group_std_mean": 0.0041437826585024595, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.49184814695036e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.49184814695036e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05286612808704376, "signal/frontier_ece_reward/group_std_mean": 0.07452991306781769, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0052866128273308275, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0052866128273308275, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4506451427936554, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5177243947982788, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.045064514875411986, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045064514875411986, "step": 60 }, { "calibration/aurc": 0.31193953094255866, "calibration/batch_distribution_entropy": 0.809071369900361, "calibration/buffer_distribution_entropy": 0.753271732079433, "calibration/confidence_entropy": 0.3125224990978349, "calibration/coverage@0%": 0.005870841487279843, "calibration/coverage@1%": 0.005870841487279843, "calibration/coverage@10%": 0.15184839774951076, "calibration/coverage@15%": 0.176879739481409, "calibration/coverage@20%": 0.19330815802348336, "calibration/coverage@25%": 0.465513195004029, "calibration/coverage@30%": 0.5644500253012164, "calibration/coverage@5%": 0.01487279843444227, "calibration/ece": 0.13437654194089554, "calibration/mean_confidence": 0.52754010562185, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 764.4, "completions/max_terminated_length": 764.4, "completions/mean_length": 188.24453125, "completions/mean_terminated_length": 188.33706665039062, "completions/min_length": 17.4, "completions/min_terminated_length": 81.2, "epoch": 0.208, "grad_norm": 0.0021228559780865908, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 216040704.0, "reward": 1.1614436149597167, "reward_std": 0.22528802156448363, "rewards/accuracy_reward": 0.45263671875, "rewards/batch_coverage_0": 0.37199272513389586, "rewards/batch_coverage_1": 0.37199272513389586, "rewards/batch_coverage_10": 0.46476385593414304, "rewards/batch_coverage_15": 0.47402478456497193, "rewards/batch_coverage_20": 0.47720582485198976, "rewards/batch_coverage_25": 0.4847422957420349, "rewards/batch_coverage_5": 0.42998725175857544, "rewards/brier_reward": 0.8034541726112365, "rewards/confidence_uniqueness_reward": 0.9194929838180542, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0033801186364144087, "rewards/frontier_ece_reward": 0.01981247924268246, "rewards/frontier_entropy_batch_reward": -0.460911363363266, "signal/accuracy_reward/centered_abs_mean": 0.157464599609375, "signal/accuracy_reward/group_std_mean": 0.20806764662265778, "signal/accuracy_reward/group_zero_std_frac": 0.39375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0787322998046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0787322998046875, "signal/advantage_abs_mean": 0.17317451536655426, "signal/advantage_pre_scale_abs_mean": 0.17317451536655426, "signal/advantage_pre_scale_std": 0.23805280029773712, "signal/advantage_std": 0.23805280029773712, "signal/batch_coverage_0/centered_abs_mean": 0.20832762718200684, "signal/batch_coverage_0/group_std_mean": 0.26984869241714476, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.020832763239741325, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.020832763239741325, "signal/batch_coverage_1/centered_abs_mean": 0.20832762718200684, "signal/batch_coverage_1/group_std_mean": 0.26984869241714476, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.020832763239741325, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.020832763239741325, "signal/batch_coverage_10/centered_abs_mean": 0.2282239854335785, "signal/batch_coverage_10/group_std_mean": 0.2933039665222168, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02282239906489849, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02282239906489849, "signal/batch_coverage_15/centered_abs_mean": 0.2308487683534622, "signal/batch_coverage_15/group_std_mean": 0.296176677942276, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02308487668633461, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.02308487668633461, "signal/batch_coverage_20/centered_abs_mean": 0.22977923154830932, "signal/batch_coverage_20/group_std_mean": 0.29552650451660156, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.022977923229336738, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.022977923229336738, "signal/batch_coverage_25/centered_abs_mean": 0.23684509396553038, "signal/batch_coverage_25/group_std_mean": 0.30428668260574343, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.023684510216116905, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.023684510216116905, "signal/batch_coverage_5/centered_abs_mean": 0.21925785541534423, "signal/batch_coverage_5/group_std_mean": 0.2825684487819672, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.021925785392522813, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.021925785392522813, "signal/brier_reward/centered_abs_mean": 0.1792667180299759, "signal/brier_reward/group_std_mean": 0.2295171409845352, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017926672659814357, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017926672659814357, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.051846512407064435, "signal/confidence_uniqueness_reward/group_std_mean": 0.06691792458295823, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0051846509799361225, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0051846509799361225, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.005187963182106614, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023344816174358128, "signal/frontier_aurc_reward/group_std_mean": 0.003417077288031578, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9181021091062576e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9181021091062576e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04670820683240891, "signal/frontier_ece_reward/group_std_mean": 0.06903419941663742, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004670820478349924, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004670820478349924, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4435045063495636, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5081867694854736, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.044350451231002806, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044350451231002806, "step": 65 }, { "calibration/aurc": 0.3552052316324844, "calibration/batch_distribution_entropy": 0.8085204306568803, "calibration/buffer_distribution_entropy": 0.7622269030192862, "calibration/confidence_entropy": 0.30718985568153173, "calibration/coverage@0%": 0.004705882352941176, "calibration/coverage@1%": 0.004705882352941176, "calibration/coverage@10%": 0.10938797436782932, "calibration/coverage@15%": 0.16579870304286098, "calibration/coverage@20%": 0.32641418211120066, "calibration/coverage@25%": 0.41727178542649934, "calibration/coverage@30%": 0.49365949119373775, "calibration/coverage@5%": 0.02, "calibration/ece": 0.13717859469131563, "calibration/mean_confidence": 0.41138431787069046, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 635.8, "completions/max_terminated_length": 635.8, "completions/mean_length": 189.36298828125, "completions/mean_terminated_length": 189.77273254394532, "completions/min_length": 0.0, "completions/min_terminated_length": 86.8, "epoch": 0.224, "grad_norm": 0.0016844564815983176, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 233132965.0, "reward": 1.1587292671203613, "reward_std": 0.21179547905921936, "rewards/accuracy_reward": 0.39658203125, "rewards/batch_coverage_0": 0.43869972229003906, "rewards/batch_coverage_1": 0.43869972229003906, "rewards/batch_coverage_10": 0.5035058617591858, "rewards/batch_coverage_15": 0.5092388391494751, "rewards/batch_coverage_20": 0.5123389482498169, "rewards/batch_coverage_25": 0.5107143759727478, "rewards/batch_coverage_5": 0.4755396842956543, "rewards/brier_reward": 0.8166243195533752, "rewards/confidence_uniqueness_reward": 0.8880064725875855, "rewards/format_reward": 0.9974609375, "rewards/frontier_aurc_reward": -0.003564742440357804, "rewards/frontier_ece_reward": 0.01565170958638191, "rewards/frontier_entropy_batch_reward": -0.49149676561355593, "signal/accuracy_reward/centered_abs_mean": 0.147259521484375, "signal/accuracy_reward/group_std_mean": 0.19043090045452118, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0736297607421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0736297607421875, "signal/advantage_abs_mean": 0.16093077659606933, "signal/advantage_pre_scale_abs_mean": 0.16093077659606933, "signal/advantage_pre_scale_std": 0.22910359501838684, "signal/advantage_std": 0.22910359501838684, "signal/batch_coverage_0/centered_abs_mean": 0.19967867136001588, "signal/batch_coverage_0/group_std_mean": 0.25975680351257324, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01996786817908287, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01996786817908287, "signal/batch_coverage_1/centered_abs_mean": 0.19967867136001588, "signal/batch_coverage_1/group_std_mean": 0.25975680351257324, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01996786817908287, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01996786817908287, "signal/batch_coverage_10/centered_abs_mean": 0.21668496429920198, "signal/batch_coverage_10/group_std_mean": 0.2810732364654541, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02166849635541439, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02166849635541439, "signal/batch_coverage_15/centered_abs_mean": 0.21663637459278107, "signal/batch_coverage_15/group_std_mean": 0.28133258819580076, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.021663638204336165, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.021663638204336165, "signal/batch_coverage_20/centered_abs_mean": 0.21716900467872619, "signal/batch_coverage_20/group_std_mean": 0.28218746185302734, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.021716900169849396, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.021716900169849396, "signal/batch_coverage_25/centered_abs_mean": 0.21201648116111754, "signal/batch_coverage_25/group_std_mean": 0.27759831547737124, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02120164819061756, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.02120164819061756, "signal/batch_coverage_5/centered_abs_mean": 0.20889467895030975, "signal/batch_coverage_5/group_std_mean": 0.27032582759857177, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02088946886360645, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.02088946886360645, "signal/brier_reward/centered_abs_mean": 0.1668454587459564, "signal/brier_reward/group_std_mean": 0.218032369017601, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01668454669415951, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01668454669415951, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07639306187629699, "signal/confidence_uniqueness_reward/group_std_mean": 0.09777989089488984, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0076393064111471174, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0076393064111471174, "signal/format_reward/centered_abs_mean": 0.00484619140625, "signal/format_reward/group_std_mean": 0.012622351385653019, "signal/format_reward/group_zero_std_frac": 0.934375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002423095703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002423095703125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002311847684904933, "signal/frontier_aurc_reward/group_std_mean": 0.003476549405604601, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8898097298224457e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8898097298224457e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.039065159112215045, "signal/frontier_ece_reward/group_std_mean": 0.05721670910716057, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003906515752896666, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003906515752896666, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4242829144001007, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4961832225322723, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042428291589021686, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042428291589021686, "step": 70 }, { "calibration/aurc": 0.39189889727631827, "calibration/batch_distribution_entropy": 0.8065487656414344, "calibration/buffer_distribution_entropy": 0.7683577524517988, "calibration/confidence_entropy": 0.30581510958839314, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.145703125, "calibration/coverage@15%": 0.1953125, "calibration/coverage@20%": 0.253515625, "calibration/coverage@25%": 0.291015625, "calibration/coverage@30%": 0.346484375, "calibration/coverage@5%": 0.094921875, "calibration/ece": 0.16096194237733213, "calibration/mean_confidence": 0.4557611454920851, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 727.8, "completions/max_terminated_length": 727.8, "completions/mean_length": 193.5591796875, "completions/mean_terminated_length": 193.65261840820312, "completions/min_length": 32.8, "completions/min_terminated_length": 84.2, "epoch": 0.24, "grad_norm": 0.002140032360330224, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 250366691.0, "reward": 1.1608482837677, "reward_std": 0.2237977057695389, "rewards/accuracy_reward": 0.44482421875, "rewards/batch_coverage_0": 0.40374218821525576, "rewards/batch_coverage_1": 0.40374218821525576, "rewards/batch_coverage_10": 0.4652402937412262, "rewards/batch_coverage_15": 0.4752448081970215, "rewards/batch_coverage_20": 0.48339633345603944, "rewards/batch_coverage_25": 0.4878033041954041, "rewards/batch_coverage_5": 0.43672565221786497, "rewards/brier_reward": 0.8111422181129455, "rewards/confidence_uniqueness_reward": 0.8884037733078003, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0031261141411960125, "rewards/frontier_ece_reward": 0.018053279910236596, "rewards/frontier_entropy_batch_reward": -0.48581200242042544, "signal/accuracy_reward/centered_abs_mean": 0.176092529296875, "signal/accuracy_reward/group_std_mean": 0.22773178815841674, "signal/accuracy_reward/group_zero_std_frac": 0.371875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0880462646484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0880462646484375, "signal/advantage_abs_mean": 0.17269106805324555, "signal/advantage_pre_scale_abs_mean": 0.17269106805324555, "signal/advantage_pre_scale_std": 0.2400616377592087, "signal/advantage_std": 0.2400616377592087, "signal/batch_coverage_0/centered_abs_mean": 0.21278543770313263, "signal/batch_coverage_0/group_std_mean": 0.2748673528432846, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.021278544515371322, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.021278544515371322, "signal/batch_coverage_1/centered_abs_mean": 0.21278543770313263, "signal/batch_coverage_1/group_std_mean": 0.2748673528432846, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.021278544515371322, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.021278544515371322, "signal/batch_coverage_10/centered_abs_mean": 0.2333226978778839, "signal/batch_coverage_10/group_std_mean": 0.2996509253978729, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.023332270979881286, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.023332270979881286, "signal/batch_coverage_15/centered_abs_mean": 0.23160504698753356, "signal/batch_coverage_15/group_std_mean": 0.2971563279628754, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.023160504922270776, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.023160504922270776, "signal/batch_coverage_20/centered_abs_mean": 0.23060874938964843, "signal/batch_coverage_20/group_std_mean": 0.29717103838920594, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02306087501347065, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02306087501347065, "signal/batch_coverage_25/centered_abs_mean": 0.23110208213329314, "signal/batch_coverage_25/group_std_mean": 0.2988200902938843, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02311020828783512, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.02311020828783512, "signal/batch_coverage_5/centered_abs_mean": 0.22297326028347014, "signal/batch_coverage_5/group_std_mean": 0.2872433841228485, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02229732573032379, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.02229732573032379, "signal/brier_reward/centered_abs_mean": 0.17840671837329863, "signal/brier_reward/group_std_mean": 0.2301701694726944, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01784067116677761, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01784067116677761, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07509338706731797, "signal/confidence_uniqueness_reward/group_std_mean": 0.09538438469171524, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0075093389488756655, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0075093389488756655, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025602192617952824, "signal/frontier_aurc_reward/group_std_mean": 0.003933770721778274, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.200274150003679e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.200274150003679e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.040749994665384294, "signal/frontier_ece_reward/group_std_mean": 0.06029489189386368, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00407499959692359, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00407499959692359, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43094528913497926, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5003817081451416, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04309452995657921, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04309452995657921, "step": 75 }, { "calibration/aurc": 0.34841073189177924, "calibration/batch_distribution_entropy": 0.7404371928462201, "calibration/buffer_distribution_entropy": 0.7727618186889185, "calibration/confidence_entropy": 0.26339871275418164, "calibration/coverage@0%": 0.01796875, "calibration/coverage@1%": 0.01796875, "calibration/coverage@10%": 0.232421875, "calibration/coverage@15%": 0.30234375, "calibration/coverage@20%": 0.35078125, "calibration/coverage@25%": 0.3921875, "calibration/coverage@30%": 0.44260258683953035, "calibration/coverage@5%": 0.061328125, "calibration/ece": 0.1280873174322534, "calibration/mean_confidence": 0.4740620152862841, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 793.2, "completions/max_terminated_length": 793.2, "completions/mean_length": 187.65595703125, "completions/mean_terminated_length": 187.76578369140626, "completions/min_length": 33.8, "completions/min_terminated_length": 82.0, "epoch": 0.256, "grad_norm": 0.003614980261772871, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 267343104.0, "reward": 1.175432276725769, "reward_std": 0.20357095897197724, "rewards/accuracy_reward": 0.4142578125, "rewards/batch_coverage_0": 0.4698097288608551, "rewards/batch_coverage_1": 0.4698097288608551, "rewards/batch_coverage_10": 0.5149709939956665, "rewards/batch_coverage_15": 0.5213613927364349, "rewards/batch_coverage_20": 0.5291022181510925, "rewards/batch_coverage_25": 0.5314548969268799, "rewards/batch_coverage_5": 0.4935650169849396, "rewards/brier_reward": 0.8144651532173157, "rewards/confidence_uniqueness_reward": 0.8675598978996277, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0034637684002518654, "rewards/frontier_ece_reward": 0.020288588479161263, "rewards/frontier_entropy_batch_reward": -0.5450147032737732, "signal/accuracy_reward/centered_abs_mean": 0.14793701171875, "signal/accuracy_reward/group_std_mean": 0.18589730560779572, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073968505859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.073968505859375, "signal/advantage_abs_mean": 0.1570839136838913, "signal/advantage_pre_scale_abs_mean": 0.1570839136838913, "signal/advantage_pre_scale_std": 0.22588954865932465, "signal/advantage_std": 0.22588954865932465, "signal/batch_coverage_0/centered_abs_mean": 0.19263360798358917, "signal/batch_coverage_0/group_std_mean": 0.2533486902713776, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019263360276818277, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.019263360276818277, "signal/batch_coverage_1/centered_abs_mean": 0.19263360798358917, "signal/batch_coverage_1/group_std_mean": 0.2533486902713776, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019263360276818277, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.019263360276818277, "signal/batch_coverage_10/centered_abs_mean": 0.20532942712306976, "signal/batch_coverage_10/group_std_mean": 0.26716126799583434, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.020532942935824396, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.020532942935824396, "signal/batch_coverage_15/centered_abs_mean": 0.20610760152339935, "signal/batch_coverage_15/group_std_mean": 0.26746096909046174, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020610759779810906, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.020610759779810906, "signal/batch_coverage_20/centered_abs_mean": 0.20913188457489013, "signal/batch_coverage_20/group_std_mean": 0.2716536849737167, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02091318890452385, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02091318890452385, "signal/batch_coverage_25/centered_abs_mean": 0.2067886620759964, "signal/batch_coverage_25/group_std_mean": 0.2700066208839417, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020678867399692536, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.020678867399692536, "signal/batch_coverage_5/centered_abs_mean": 0.19935680627822877, "signal/batch_coverage_5/group_std_mean": 0.2602789878845215, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01993568167090416, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01993568167090416, "signal/brier_reward/centered_abs_mean": 0.16666122376918793, "signal/brier_reward/group_std_mean": 0.21535793244838713, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016666123270988466, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016666123270988466, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08296736031770706, "signal/confidence_uniqueness_reward/group_std_mean": 0.10238117724657059, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008296736143529415, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008296736143529415, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029713909141719343, "signal/frontier_aurc_reward/group_std_mean": 0.004420665092766285, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7142387009225784e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7142387009225784e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04267800748348236, "signal/frontier_ece_reward/group_std_mean": 0.060703708231449126, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004267800692468881, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004267800692468881, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40921489596366883, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4904153048992157, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.040921490639448166, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.040921490639448166, "step": 80 }, { "calibration/aurc": 0.4095207420497604, "calibration/batch_distribution_entropy": 0.7292892820522248, "calibration/buffer_distribution_entropy": 0.7714704851416797, "calibration/confidence_entropy": 0.24582674132048177, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.10450097847358122, "calibration/coverage@20%": 0.17142857142857143, "calibration/coverage@25%": 0.24810114970645794, "calibration/coverage@30%": 0.2965852800880626, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14765774260001646, "calibration/mean_confidence": 0.4614050591762385, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 556.0, "completions/max_terminated_length": 556.0, "completions/mean_length": 194.88779296875, "completions/mean_terminated_length": 195.06065979003907, "completions/min_length": 0.0, "completions/min_terminated_length": 80.4, "epoch": 0.272, "grad_norm": 0.002637905301526189, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 284304451.0, "reward": 1.1642091512680053, "reward_std": 0.21782591938972473, "rewards/accuracy_reward": 0.40166015625, "rewards/batch_coverage_0": 0.45555404424667356, "rewards/batch_coverage_1": 0.45555404424667356, "rewards/batch_coverage_10": 0.5190391063690185, "rewards/batch_coverage_15": 0.5268309950828552, "rewards/batch_coverage_20": 0.5372787833213806, "rewards/batch_coverage_25": 0.5389063596725464, "rewards/batch_coverage_5": 0.4952739357948303, "rewards/brier_reward": 0.8077268123626709, "rewards/confidence_uniqueness_reward": 0.8441998481750488, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.003703146381303668, "rewards/frontier_ece_reward": 0.018940356373786927, "rewards/frontier_entropy_batch_reward": -0.5591914057731628, "signal/accuracy_reward/centered_abs_mean": 0.147406005859375, "signal/accuracy_reward/group_std_mean": 0.1893194407224655, "signal/accuracy_reward/group_zero_std_frac": 0.48125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0737030029296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0737030029296875, "signal/advantage_abs_mean": 0.16743640899658202, "signal/advantage_pre_scale_abs_mean": 0.16743640899658202, "signal/advantage_pre_scale_std": 0.2411186933517456, "signal/advantage_std": 0.2411186933517456, "signal/batch_coverage_0/centered_abs_mean": 0.19905173778533936, "signal/batch_coverage_0/group_std_mean": 0.26072719097137453, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.019905174523591994, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.019905174523591994, "signal/batch_coverage_1/centered_abs_mean": 0.19905173778533936, "signal/batch_coverage_1/group_std_mean": 0.26072719097137453, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.019905174523591994, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.019905174523591994, "signal/batch_coverage_10/centered_abs_mean": 0.21653661429882048, "signal/batch_coverage_10/group_std_mean": 0.28195360898971555, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.021653661131858827, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.021653661131858827, "signal/batch_coverage_15/centered_abs_mean": 0.2188299685716629, "signal/batch_coverage_15/group_std_mean": 0.2830525070428848, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.021882996335625647, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.021882996335625647, "signal/batch_coverage_20/centered_abs_mean": 0.2214237332344055, "signal/batch_coverage_20/group_std_mean": 0.2871974349021912, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.022142373397946356, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.022142373397946356, "signal/batch_coverage_25/centered_abs_mean": 0.2215735673904419, "signal/batch_coverage_25/group_std_mean": 0.28793606758117674, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.022157356888055802, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.022157356888055802, "signal/batch_coverage_5/centered_abs_mean": 0.20815829634666444, "signal/batch_coverage_5/group_std_mean": 0.27133035063743594, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.020815829932689666, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.020815829932689666, "signal/brier_reward/centered_abs_mean": 0.17668550610542297, "signal/brier_reward/group_std_mean": 0.2282479226589203, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017668551579117776, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017668551579117776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10292446613311768, "signal/confidence_uniqueness_reward/group_std_mean": 0.12634139955043794, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010292447078973055, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010292447078973055, "signal/format_reward/centered_abs_mean": 0.00225830078125, "signal/format_reward/group_std_mean": 0.0062928176019340755, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001129150390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003706426313146949, "signal/frontier_aurc_reward/group_std_mean": 0.005361904297024012, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.633032949641347e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.633032949641347e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.043428726494312286, "signal/frontier_ece_reward/group_std_mean": 0.06040780916810036, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004342872835695744, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004342872835695744, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42182513475418093, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4972574055194855, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0421825148165226, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0421825148165226, "step": 85 }, { "calibration/aurc": 0.38642271935922723, "calibration/batch_distribution_entropy": 0.7343121751663448, "calibration/buffer_distribution_entropy": 0.7698096977080258, "calibration/confidence_entropy": 0.24101454783337828, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.015655577299412915, "calibration/coverage@20%": 0.07475538160469666, "calibration/coverage@25%": 0.12211350293542074, "calibration/coverage@30%": 0.3546370474559687, "calibration/coverage@5%": 0.0, "calibration/ece": 0.15231339925359932, "calibration/mean_confidence": 0.5120636185686182, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 774.6, "completions/max_terminated_length": 774.6, "completions/mean_length": 192.52734375, "completions/mean_terminated_length": 192.73372497558594, "completions/min_length": 16.8, "completions/min_terminated_length": 88.8, "epoch": 0.288, "grad_norm": 0.003030631458386779, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 301234107.0, "reward": 1.1730293035507202, "reward_std": 0.21305195093154908, "rewards/accuracy_reward": 0.39794921875, "rewards/batch_coverage_0": 0.4744568645954132, "rewards/batch_coverage_1": 0.4744568645954132, "rewards/batch_coverage_10": 0.5303399085998535, "rewards/batch_coverage_15": 0.5415215492248535, "rewards/batch_coverage_20": 0.551158607006073, "rewards/batch_coverage_25": 0.5548080563545227, "rewards/batch_coverage_5": 0.5028053164482117, "rewards/brier_reward": 0.8083776473999024, "rewards/confidence_uniqueness_reward": 0.8495068907737732, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.00370310852304101, "rewards/frontier_ece_reward": 0.020712151564657687, "rewards/frontier_entropy_batch_reward": -0.561275064945221, "signal/accuracy_reward/centered_abs_mean": 0.138385009765625, "signal/accuracy_reward/group_std_mean": 0.1806561380624771, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0691925048828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0691925048828125, "signal/advantage_abs_mean": 0.16109004020690917, "signal/advantage_pre_scale_abs_mean": 0.16109004020690917, "signal/advantage_pre_scale_std": 0.2384302794933319, "signal/advantage_std": 0.2384302794933319, "signal/batch_coverage_0/centered_abs_mean": 0.18493348658084868, "signal/batch_coverage_0/group_std_mean": 0.24907850623130798, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018493348360061647, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.018493348360061647, "signal/batch_coverage_1/centered_abs_mean": 0.18493348658084868, "signal/batch_coverage_1/group_std_mean": 0.24907850623130798, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018493348360061647, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.018493348360061647, "signal/batch_coverage_10/centered_abs_mean": 0.19726809859275818, "signal/batch_coverage_10/group_std_mean": 0.26376489698886874, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019726810976862907, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.019726810976862907, "signal/batch_coverage_15/centered_abs_mean": 0.20315858125686645, "signal/batch_coverage_15/group_std_mean": 0.27094233632087705, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020315859094262124, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.020315859094262124, "signal/batch_coverage_20/centered_abs_mean": 0.21178129315376282, "signal/batch_coverage_20/group_std_mean": 0.28007384538650515, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02117813006043434, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.02117813006043434, "signal/batch_coverage_25/centered_abs_mean": 0.21607309579849243, "signal/batch_coverage_25/group_std_mean": 0.2853268563747406, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.021607310324907304, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.021607310324907304, "signal/batch_coverage_5/centered_abs_mean": 0.19155743420124055, "signal/batch_coverage_5/group_std_mean": 0.2568057715892792, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01915574409067631, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01915574409067631, "signal/brier_reward/centered_abs_mean": 0.1633100241422653, "signal/brier_reward/group_std_mean": 0.21677840054035186, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016331002302467822, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016331002302467822, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0984340637922287, "signal/confidence_uniqueness_reward/group_std_mean": 0.1197776436805725, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009843406639993191, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009843406639993191, "signal/format_reward/centered_abs_mean": 0.0022705078125, "signal/format_reward/group_std_mean": 0.006629125867038965, "signal/format_reward/group_zero_std_frac": 0.9625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00113525390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00113525390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0035885621327906847, "signal/frontier_aurc_reward/group_std_mean": 0.005224420595914126, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.485702738747932e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.485702738747932e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04839966595172882, "signal/frontier_ece_reward/group_std_mean": 0.07236144691705704, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048399668186903, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048399668186903, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.41489365696907043, "signal/frontier_entropy_batch_reward/group_std_mean": 0.49374261498451233, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.009375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.041489367932081224, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.041489367932081224, "step": 90 }, { "calibration/aurc": 0.3088800902089267, "calibration/batch_distribution_entropy": 0.7244132146017391, "calibration/buffer_distribution_entropy": 0.7693529126591884, "calibration/confidence_entropy": 0.23922881391576922, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.14240230552837574, "calibration/coverage@15%": 0.23816734955968685, "calibration/coverage@20%": 0.2624151479941291, "calibration/coverage@25%": 0.3132254464285714, "calibration/coverage@30%": 0.5234031005381604, "calibration/coverage@5%": 0.05593658268101761, "calibration/ece": 0.13676851084610558, "calibration/mean_confidence": 0.5120746492478866, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 617.4, "completions/max_terminated_length": 617.4, "completions/mean_length": 201.907421875, "completions/mean_terminated_length": 202.04771118164064, "completions/min_length": 17.4, "completions/min_terminated_length": 90.2, "epoch": 0.304, "grad_norm": 0.0020372753497213125, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 318231591.0, "reward": 1.168937587738037, "reward_std": 0.21276797950267792, "rewards/accuracy_reward": 0.4037109375, "rewards/batch_coverage_0": 0.4556680262088776, "rewards/batch_coverage_1": 0.4556680262088776, "rewards/batch_coverage_10": 0.5263420939445496, "rewards/batch_coverage_15": 0.5296601533889771, "rewards/batch_coverage_20": 0.5361104488372803, "rewards/batch_coverage_25": 0.538708108663559, "rewards/batch_coverage_5": 0.5019454300403595, "rewards/brier_reward": 0.8099380731582642, "rewards/confidence_uniqueness_reward": 0.838314151763916, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0033911903388798236, "rewards/frontier_ece_reward": 0.020074135437607766, "rewards/frontier_entropy_batch_reward": -0.5372773051261902, "signal/accuracy_reward/centered_abs_mean": 0.13790283203125, "signal/accuracy_reward/group_std_mean": 0.18555650413036345, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.068951416015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.068951416015625, "signal/advantage_abs_mean": 0.16150518357753754, "signal/advantage_pre_scale_abs_mean": 0.16150518357753754, "signal/advantage_pre_scale_std": 0.23679069578647613, "signal/advantage_std": 0.23679069578647613, "signal/batch_coverage_0/centered_abs_mean": 0.19015144407749177, "signal/batch_coverage_0/group_std_mean": 0.24815911054611206, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01901514418423176, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01901514418423176, "signal/batch_coverage_1/centered_abs_mean": 0.19015144407749177, "signal/batch_coverage_1/group_std_mean": 0.24815911054611206, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01901514418423176, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01901514418423176, "signal/batch_coverage_10/centered_abs_mean": 0.21041842699050903, "signal/batch_coverage_10/group_std_mean": 0.2728098422288895, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02104184255003929, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.02104184255003929, "signal/batch_coverage_15/centered_abs_mean": 0.20971550941467285, "signal/batch_coverage_15/group_std_mean": 0.27206642627716066, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.020971550419926644, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.020971550419926644, "signal/batch_coverage_20/centered_abs_mean": 0.21126347482204438, "signal/batch_coverage_20/group_std_mean": 0.2744140148162842, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.021126347407698632, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.021126347407698632, "signal/batch_coverage_25/centered_abs_mean": 0.21306751370429994, "signal/batch_coverage_25/group_std_mean": 0.27628334164619445, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.021306751295924187, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.021306751295924187, "signal/batch_coverage_5/centered_abs_mean": 0.2033386319875717, "signal/batch_coverage_5/group_std_mean": 0.2633496135473251, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.020333864167332648, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.020333864167332648, "signal/brier_reward/centered_abs_mean": 0.1642611026763916, "signal/brier_reward/group_std_mean": 0.21632223427295685, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01642611101269722, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01642611101269722, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10172502249479294, "signal/confidence_uniqueness_reward/group_std_mean": 0.1260932356119156, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010172502510249615, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010172502510249615, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029861139133572578, "signal/frontier_aurc_reward/group_std_mean": 0.00451540406793356, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7326425808714705e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7326425808714705e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.043954838812351224, "signal/frontier_ece_reward/group_std_mean": 0.06721483990550041, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004395484086126089, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004395484086126089, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4110023260116577, "signal/frontier_entropy_batch_reward/group_std_mean": 0.48685582876205447, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.041100232303142546, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.041100232303142546, "step": 95 }, { "calibration/aurc": 0.31066769725534477, "calibration/batch_distribution_entropy": 0.7408097644771414, "calibration/buffer_distribution_entropy": 0.7695192210995658, "calibration/confidence_entropy": 0.24489852964300565, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.101171875, "calibration/coverage@15%": 0.3365475171232877, "calibration/coverage@20%": 0.3885350415851272, "calibration/coverage@25%": 0.43192346501956946, "calibration/coverage@30%": 0.4780447345890411, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16255623477764822, "calibration/mean_confidence": 0.5092909846624645, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 623.0, "completions/max_terminated_length": 623.0, "completions/mean_length": 203.51396484375, "completions/mean_terminated_length": 203.69292907714845, "completions/min_length": 16.8, "completions/min_terminated_length": 83.4, "epoch": 0.32, "grad_norm": 0.001760556478984654, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 335404278.0, "reward": 1.2145358324050903, "reward_std": 0.20452672243118286, "rewards/accuracy_reward": 0.44296875, "rewards/batch_coverage_0": 0.49820048213005064, "rewards/batch_coverage_1": 0.49820048213005064, "rewards/batch_coverage_10": 0.556191599369049, "rewards/batch_coverage_15": 0.5629641294479371, "rewards/batch_coverage_20": 0.5709343194961548, "rewards/batch_coverage_25": 0.5740790128707886, "rewards/batch_coverage_5": 0.5346252918243408, "rewards/brier_reward": 0.810669207572937, "rewards/confidence_uniqueness_reward": 0.8535032391548156, "rewards/format_reward": 0.99873046875, "rewards/frontier_aurc_reward": -0.0030829327646642924, "rewards/frontier_ece_reward": 0.027932414039969444, "rewards/frontier_entropy_batch_reward": -0.5500531315803527, "signal/accuracy_reward/centered_abs_mean": 0.11822509765625, "signal/accuracy_reward/group_std_mean": 0.15758318901062013, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059112548828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.059112548828125, "signal/advantage_abs_mean": 0.15452314913272858, "signal/advantage_pre_scale_abs_mean": 0.15452314913272858, "signal/advantage_pre_scale_std": 0.23202507495880126, "signal/advantage_std": 0.23202507495880126, "signal/batch_coverage_0/centered_abs_mean": 0.17627234160900115, "signal/batch_coverage_0/group_std_mean": 0.23309228122234343, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017627234011888503, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017627234011888503, "signal/batch_coverage_1/centered_abs_mean": 0.17627234160900115, "signal/batch_coverage_1/group_std_mean": 0.23309228122234343, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017627234011888503, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017627234011888503, "signal/batch_coverage_10/centered_abs_mean": 0.19112071096897126, "signal/batch_coverage_10/group_std_mean": 0.2518125683069229, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01911207064986229, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01911207064986229, "signal/batch_coverage_15/centered_abs_mean": 0.19524294137954712, "signal/batch_coverage_15/group_std_mean": 0.2568311929702759, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.019524294510483743, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.019524294510483743, "signal/batch_coverage_20/centered_abs_mean": 0.19963866770267485, "signal/batch_coverage_20/group_std_mean": 0.2630015403032303, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019963867589831354, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019963867589831354, "signal/batch_coverage_25/centered_abs_mean": 0.2009682387113571, "signal/batch_coverage_25/group_std_mean": 0.2646039962768555, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020096823945641516, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.020096823945641516, "signal/batch_coverage_5/centered_abs_mean": 0.18442264795303345, "signal/batch_coverage_5/group_std_mean": 0.2439052492380142, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.018442264944314956, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.018442264944314956, "signal/brier_reward/centered_abs_mean": 0.16162406504154206, "signal/brier_reward/group_std_mean": 0.2116384744644165, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01616240683943033, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01616240683943033, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09289654493331909, "signal/confidence_uniqueness_reward/group_std_mean": 0.11365949809551239, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009289654716849328, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009289654716849328, "signal/format_reward/centered_abs_mean": 0.002447509765625, "signal/format_reward/group_std_mean": 0.006845244579017163, "signal/format_reward/group_zero_std_frac": 0.9625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012237548828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012237548828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030647614039480686, "signal/frontier_aurc_reward/group_std_mean": 0.004521076194941997, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.830951754935086e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.830951754935086e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04688786193728447, "signal/frontier_ece_reward/group_std_mean": 0.07207571268081665, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004688786249607802, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004688786249607802, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40426061153411863, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4829357087612152, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.040426061302423474, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.040426061302423474, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.569440617189013, "eval_calibration/batch_distribution_entropy": 0.6703147758360166, "eval_calibration/buffer_distribution_entropy": 0.7686206731225668, "eval_calibration/confidence_entropy": 0.23202965696476052, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0703125, "eval_calibration/coverage@20%": 0.078125, "eval_calibration/coverage@25%": 0.09375, "eval_calibration/coverage@30%": 0.203125, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.2418025333544978, "eval_calibration/mean_confidence": 0.43286503335449783, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 510.25, "eval_completions/max_terminated_length": 510.25, "eval_completions/mean_length": 205.94585037231445, "eval_completions/mean_terminated_length": 206.36303329467773, "eval_completions/min_length": 82.5, "eval_completions/min_terminated_length": 107.25, "eval_loss": 0.0, "eval_num_tokens": 335404278.0, "eval_reward": 0.9630002677440643, "eval_reward_std": 0.27110686898231506, "eval_rewards/accuracy_reward": 0.33984375, "eval_rewards/batch_coverage_0": 0.35313040763139725, "eval_rewards/batch_coverage_1": 0.35313040763139725, "eval_rewards/batch_coverage_10": 0.35056068003177643, "eval_rewards/batch_coverage_15": 0.32362698018550873, "eval_rewards/batch_coverage_20": 0.3047519400715828, "eval_rewards/batch_coverage_25": 0.2832332253456116, "eval_rewards/batch_coverage_5": 0.35313040763139725, "eval_rewards/brier_reward": 0.7975546419620514, "eval_rewards/confidence_uniqueness_reward": 0.8064673691987991, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.00390950427390635, "eval_rewards/frontier_ece_reward": 0.013499133347067982, "eval_rewards/frontier_entropy_batch_reward": -0.998046875, "eval_runtime": 30.6636, "eval_samples_per_second": 16.306, "eval_signal/accuracy_reward/centered_abs_mean": 0.440185546875, "eval_signal/accuracy_reward/group_std_mean": 0.47584959864616394, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2200927734375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2200927734375, "eval_signal/advantage_abs_mean": 0.22441067546606064, "eval_signal/advantage_pre_scale_abs_mean": 0.22441067546606064, "eval_signal/advantage_pre_scale_std": 0.269348181784153, "eval_signal/advantage_std": 0.269348181784153, "eval_signal/batch_coverage_0/centered_abs_mean": 0.4600137919187546, "eval_signal/batch_coverage_0/group_std_mean": 0.5194766819477081, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04600138030946255, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04600138030946255, "eval_signal/batch_coverage_1/centered_abs_mean": 0.4600137919187546, "eval_signal/batch_coverage_1/group_std_mean": 0.5194766819477081, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04600138030946255, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04600138030946255, "eval_signal/batch_coverage_10/centered_abs_mean": 0.457081101834774, "eval_signal/batch_coverage_10/group_std_mean": 0.5163652151823044, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.045708111487329006, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.045708111487329006, "eval_signal/batch_coverage_15/centered_abs_mean": 0.41683706641197205, "eval_signal/batch_coverage_15/group_std_mean": 0.4740506485104561, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04168370831757784, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04168370831757784, "eval_signal/batch_coverage_20/centered_abs_mean": 0.39310214668512344, "eval_signal/batch_coverage_20/group_std_mean": 0.4481876716017723, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03931021690368652, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03931021690368652, "eval_signal/batch_coverage_25/centered_abs_mean": 0.369586318731308, "eval_signal/batch_coverage_25/group_std_mean": 0.4246791750192642, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03695863112807274, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03695863112807274, "eval_signal/batch_coverage_5/centered_abs_mean": 0.4600137919187546, "eval_signal/batch_coverage_5/group_std_mean": 0.5194766819477081, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04600138030946255, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04600138030946255, "eval_signal/brier_reward/centered_abs_mean": 0.26220837235450745, "eval_signal/brier_reward/group_std_mean": 0.3284427151083946, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026220838073641062, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.026220838073641062, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.13375214487314224, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15917536616325378, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013375215232372284, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013375215232372284, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0043173496960662305, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006625176058150828, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3966873565514106e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3966873565514106e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05632109194993973, "eval_signal/frontier_ece_reward/group_std_mean": 0.0933114867657423, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00563210912514478, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00563210912514478, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983, "eval_steps_per_second": 0.13, "step": 100 }, { "calibration/aurc": 0.35455243453590024, "calibration/batch_distribution_entropy": 0.7786134085679792, "calibration/buffer_distribution_entropy": 0.768577979834517, "calibration/confidence_entropy": 0.2686558213720398, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.07123287671232877, "calibration/coverage@15%": 0.08180039138943249, "calibration/coverage@20%": 0.09041095890410958, "calibration/coverage@25%": 0.21627181780246346, "calibration/coverage@30%": 0.43795605142742033, "calibration/coverage@5%": 0.020743639921722113, "calibration/ece": 0.16696759740323017, "calibration/mean_confidence": 0.45578866359148174, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 541.4, "completions/max_terminated_length": 541.4, "completions/mean_length": 206.63544921875, "completions/mean_terminated_length": 206.79442749023437, "completions/min_length": 53.0, "completions/min_terminated_length": 89.2, "epoch": 0.336, "grad_norm": 0.0022354216780513525, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 352242657.0, "reward": 1.192155623435974, "reward_std": 0.19818204939365386, "rewards/accuracy_reward": 0.45048828125, "rewards/batch_coverage_0": 0.4518804371356964, "rewards/batch_coverage_1": 0.4518804371356964, "rewards/batch_coverage_10": 0.5132352769374847, "rewards/batch_coverage_15": 0.5222086906433105, "rewards/batch_coverage_20": 0.5237145602703095, "rewards/batch_coverage_25": 0.5243976712226868, "rewards/batch_coverage_5": 0.4776782333850861, "rewards/brier_reward": 0.7963641285896301, "rewards/confidence_uniqueness_reward": 0.8801257133483886, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003094083955511451, "rewards/frontier_ece_reward": 0.024742235243320466, "rewards/frontier_entropy_batch_reward": -0.4923313021659851, "signal/accuracy_reward/centered_abs_mean": 0.129144287109375, "signal/accuracy_reward/group_std_mean": 0.17095574140548705, "signal/accuracy_reward/group_zero_std_frac": 0.5125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0645721435546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0645721435546875, "signal/advantage_abs_mean": 0.15149070620536803, "signal/advantage_pre_scale_abs_mean": 0.15149070620536803, "signal/advantage_pre_scale_std": 0.21988919079303743, "signal/advantage_std": 0.21988919079303743, "signal/batch_coverage_0/centered_abs_mean": 0.18348737359046935, "signal/batch_coverage_0/group_std_mean": 0.24079204499721527, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018348737433552743, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.018348737433552743, "signal/batch_coverage_1/centered_abs_mean": 0.18348737359046935, "signal/batch_coverage_1/group_std_mean": 0.24079204499721527, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018348737433552743, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.018348737433552743, "signal/batch_coverage_10/centered_abs_mean": 0.19880570769309996, "signal/batch_coverage_10/group_std_mean": 0.26094971001148226, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019880571216344834, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.019880571216344834, "signal/batch_coverage_15/centered_abs_mean": 0.20224025547504426, "signal/batch_coverage_15/group_std_mean": 0.26526222229003904, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.02022402621805668, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.02022402621805668, "signal/batch_coverage_20/centered_abs_mean": 0.20068714618682862, "signal/batch_coverage_20/group_std_mean": 0.2642544090747833, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020068715140223504, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.020068715140223504, "signal/batch_coverage_25/centered_abs_mean": 0.20156602263450624, "signal/batch_coverage_25/group_std_mean": 0.26549296975135805, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020156602561473846, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.020156602561473846, "signal/batch_coverage_5/centered_abs_mean": 0.19100910425186157, "signal/batch_coverage_5/group_std_mean": 0.24971230030059816, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019100910797715188, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.019100910797715188, "signal/brier_reward/centered_abs_mean": 0.16946334838867189, "signal/brier_reward/group_std_mean": 0.2165478616952896, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016946335881948472, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016946335881948472, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07198369279503822, "signal/confidence_uniqueness_reward/group_std_mean": 0.09141376763582229, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0071983693167567255, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0071983693167567255, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844330430031, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003002920467406511, "signal/frontier_aurc_reward/group_std_mean": 0.004374483227729797, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7536507443292065e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7536507443292065e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04616122543811798, "signal/frontier_ece_reward/group_std_mean": 0.07393098026514053, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004616122413426638, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004616122413426638, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.391942685842514, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46693851351737975, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03919426798820495, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03919426798820495, "step": 105 }, { "calibration/aurc": 0.35680028964650196, "calibration/batch_distribution_entropy": 0.7071003218034573, "calibration/buffer_distribution_entropy": 0.7643713160726849, "calibration/confidence_entropy": 0.23301250797379686, "calibration/coverage@0%": 0.025415851272015656, "calibration/coverage@1%": 0.025415851272015656, "calibration/coverage@10%": 0.1735047700587084, "calibration/coverage@15%": 0.21611041462818004, "calibration/coverage@20%": 0.3301866744129159, "calibration/coverage@25%": 0.4040285591976517, "calibration/coverage@30%": 0.4364657228473581, "calibration/coverage@5%": 0.10628439946183951, "calibration/ece": 0.12169558641514251, "calibration/mean_confidence": 0.3869068004266283, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 714.0, "completions/max_terminated_length": 714.0, "completions/mean_length": 204.9240234375, "completions/mean_terminated_length": 205.08332214355468, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.352, "grad_norm": 0.0022575741168111563, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 369601495.0, "reward": 1.1860121488571167, "reward_std": 0.1875196486711502, "rewards/accuracy_reward": 0.4072265625, "rewards/batch_coverage_0": 0.482658326625824, "rewards/batch_coverage_1": 0.482658326625824, "rewards/batch_coverage_10": 0.5309643864631652, "rewards/batch_coverage_15": 0.5396885633468628, "rewards/batch_coverage_20": 0.5477299809455871, "rewards/batch_coverage_25": 0.5511409044265747, "rewards/batch_coverage_5": 0.5098637223243714, "rewards/brier_reward": 0.8044361114501953, "rewards/confidence_uniqueness_reward": 0.8674160838127136, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.0032843008171766996, "rewards/frontier_ece_reward": 0.020676460489630698, "rewards/frontier_entropy_batch_reward": -0.5084396362304687, "signal/accuracy_reward/centered_abs_mean": 0.11280517578125, "signal/accuracy_reward/group_std_mean": 0.14706921875476836, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056402587890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.056402587890625, "signal/advantage_abs_mean": 0.14266299903392793, "signal/advantage_pre_scale_abs_mean": 0.14266299903392793, "signal/advantage_pre_scale_std": 0.21263113021850585, "signal/advantage_std": 0.21263113021850585, "signal/batch_coverage_0/centered_abs_mean": 0.1743027001619339, "signal/batch_coverage_0/group_std_mean": 0.23004747033119202, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017430270463228224, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017430270463228224, "signal/batch_coverage_1/centered_abs_mean": 0.1743027001619339, "signal/batch_coverage_1/group_std_mean": 0.23004747033119202, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017430270463228224, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017430270463228224, "signal/batch_coverage_10/centered_abs_mean": 0.18721598982810975, "signal/batch_coverage_10/group_std_mean": 0.24640632271766663, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018721599504351615, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.018721599504351615, "signal/batch_coverage_15/centered_abs_mean": 0.18796933591365814, "signal/batch_coverage_15/group_std_mean": 0.24771904051303864, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01879693418741226, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01879693418741226, "signal/batch_coverage_20/centered_abs_mean": 0.1884627491235733, "signal/batch_coverage_20/group_std_mean": 0.24858099222183228, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01884627602994442, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01884627602994442, "signal/batch_coverage_25/centered_abs_mean": 0.192040291428566, "signal/batch_coverage_25/group_std_mean": 0.25262742638587954, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01920402981340885, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01920402981340885, "signal/batch_coverage_5/centered_abs_mean": 0.18090624809265138, "signal/batch_coverage_5/group_std_mean": 0.2380412310361862, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01809062510728836, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01809062510728836, "signal/brier_reward/centered_abs_mean": 0.15644164383411407, "signal/brier_reward/group_std_mean": 0.20135876834392546, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015644165128469466, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015644165128469466, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08378114998340606, "signal/confidence_uniqueness_reward/group_std_mean": 0.10465183854103088, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008378114923834801, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008378114923834801, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_std_mean": 0.004635535972192883, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031608616933226585, "signal/frontier_aurc_reward/group_std_mean": 0.004724891297519207, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.951077233068645e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.951077233068645e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03993298932909965, "signal/frontier_ece_reward/group_std_mean": 0.06318138912320137, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003993298951536417, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003993298951536417, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3889405906200409, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4657216012477875, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03889405876398087, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03889405876398087, "step": 110 }, { "calibration/aurc": 0.42205895128309256, "calibration/batch_distribution_entropy": 0.7692756274501231, "calibration/buffer_distribution_entropy": 0.7559267013034778, "calibration/confidence_entropy": 0.2641741483942744, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.051953125, "calibration/coverage@15%": 0.092578125, "calibration/coverage@20%": 0.245703125, "calibration/coverage@25%": 0.291796875, "calibration/coverage@30%": 0.32421875, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16944534321662208, "calibration/mean_confidence": 0.48004563898771097, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 604.4, "completions/max_terminated_length": 604.4, "completions/mean_length": 200.35048828125, "completions/mean_terminated_length": 200.54432678222656, "completions/min_length": 35.4, "completions/min_terminated_length": 88.6, "epoch": 0.368, "grad_norm": 0.001792994444258511, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 386718556.0, "reward": 1.2112500190734863, "reward_std": 0.187380114197731, "rewards/accuracy_reward": 0.4390625, "rewards/batch_coverage_0": 0.4950934827327728, "rewards/batch_coverage_1": 0.4950934827327728, "rewards/batch_coverage_10": 0.5496911406517029, "rewards/batch_coverage_15": 0.5525175213813782, "rewards/batch_coverage_20": 0.5536886215209961, "rewards/batch_coverage_25": 0.5523674130439759, "rewards/batch_coverage_5": 0.5226927876472474, "rewards/brier_reward": 0.7964340925216675, "rewards/confidence_uniqueness_reward": 0.8855133891105652, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.0035252573899924753, "rewards/frontier_ece_reward": 0.020503921248018742, "rewards/frontier_entropy_batch_reward": -0.5001079976558686, "signal/accuracy_reward/centered_abs_mean": 0.1037353515625, "signal/accuracy_reward/group_std_mean": 0.13716953694820405, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05186767578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05186767578125, "signal/advantage_abs_mean": 0.14186038076877594, "signal/advantage_pre_scale_abs_mean": 0.14186038076877594, "signal/advantage_pre_scale_std": 0.21083785593509674, "signal/advantage_std": 0.21083785593509674, "signal/batch_coverage_0/centered_abs_mean": 0.18369400203227998, "signal/batch_coverage_0/group_std_mean": 0.23681255280971528, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.018369400501251222, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.018369400501251222, "signal/batch_coverage_1/centered_abs_mean": 0.18369400203227998, "signal/batch_coverage_1/group_std_mean": 0.23681255280971528, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.018369400501251222, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.018369400501251222, "signal/batch_coverage_10/centered_abs_mean": 0.195779687166214, "signal/batch_coverage_10/group_std_mean": 0.2538564056158066, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019577968493103982, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.019577968493103982, "signal/batch_coverage_15/centered_abs_mean": 0.1936182737350464, "signal/batch_coverage_15/group_std_mean": 0.25214785933494566, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01936182826757431, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01936182826757431, "signal/batch_coverage_20/centered_abs_mean": 0.194045352935791, "signal/batch_coverage_20/group_std_mean": 0.25312704145908355, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019404535368084907, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019404535368084907, "signal/batch_coverage_25/centered_abs_mean": 0.18496437072753907, "signal/batch_coverage_25/group_std_mean": 0.2431274473667145, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01849643774330616, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01849643774330616, "signal/batch_coverage_5/centered_abs_mean": 0.19195379316806793, "signal/batch_coverage_5/group_std_mean": 0.24739271104335786, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.019195379316806795, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.019195379316806795, "signal/brier_reward/centered_abs_mean": 0.15315645933151245, "signal/brier_reward/group_std_mean": 0.19987372756004335, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01531564611941576, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01531564611941576, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07148051410913467, "signal/confidence_uniqueness_reward/group_std_mean": 0.08934253603219985, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00714805144816637, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00714805144816637, "signal/format_reward/centered_abs_mean": 0.0022216796875, "signal/format_reward/group_std_mean": 0.005560987815260887, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00111083984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00111083984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036128945648670197, "signal/frontier_aurc_reward/group_std_mean": 0.005264179687947035, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.516118278843351e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.516118278843351e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03991732746362686, "signal/frontier_ece_reward/group_std_mean": 0.0683354414999485, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00399173297919333, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00399173297919333, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3917108952999115, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4644228458404541, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03917108997702599, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03917108997702599, "step": 115 }, { "calibration/aurc": 0.3312790981825941, "calibration/batch_distribution_entropy": 0.7078424659451629, "calibration/buffer_distribution_entropy": 0.746863848902535, "calibration/confidence_entropy": 0.2260585576901173, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.20234375, "calibration/coverage@15%": 0.257421875, "calibration/coverage@20%": 0.294921875, "calibration/coverage@25%": 0.3265625, "calibration/coverage@30%": 0.4231730063600782, "calibration/coverage@5%": 0.094140625, "calibration/ece": 0.13303364661015177, "calibration/mean_confidence": 0.4431268148201619, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 626.8, "completions/max_terminated_length": 626.8, "completions/mean_length": 198.51220703125, "completions/mean_terminated_length": 198.53143615722655, "completions/min_length": 70.2, "completions/min_terminated_length": 90.0, "epoch": 0.384, "grad_norm": 0.0019350156653672457, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 403607833.0, "reward": 1.2347772121429443, "reward_std": 0.1755542814731598, "rewards/accuracy_reward": 0.47451171875, "rewards/batch_coverage_0": 0.486088764667511, "rewards/batch_coverage_1": 0.486088764667511, "rewards/batch_coverage_10": 0.5594327092170716, "rewards/batch_coverage_15": 0.5647305846214294, "rewards/batch_coverage_20": 0.5726596713066101, "rewards/batch_coverage_25": 0.5790935397148133, "rewards/batch_coverage_5": 0.5290896475315094, "rewards/brier_reward": 0.8075304627418518, "rewards/confidence_uniqueness_reward": 0.8788026452064515, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0032432018779218197, "rewards/frontier_ece_reward": 0.02107427977025509, "rewards/frontier_entropy_batch_reward": -0.5079957664012908, "signal/accuracy_reward/centered_abs_mean": 0.102032470703125, "signal/accuracy_reward/group_std_mean": 0.14291528165340422, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0510162353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0510162353515625, "signal/advantage_abs_mean": 0.13100714832544327, "signal/advantage_pre_scale_abs_mean": 0.13100714832544327, "signal/advantage_pre_scale_std": 0.20000562965869903, "signal/advantage_std": 0.20000562965869903, "signal/batch_coverage_0/centered_abs_mean": 0.1647425413131714, "signal/batch_coverage_0/group_std_mean": 0.2197166472673416, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016474254056811334, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.016474254056811334, "signal/batch_coverage_1/centered_abs_mean": 0.1647425413131714, "signal/batch_coverage_1/group_std_mean": 0.2197166472673416, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016474254056811334, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.016474254056811334, "signal/batch_coverage_10/centered_abs_mean": 0.1836285799741745, "signal/batch_coverage_10/group_std_mean": 0.2453942656517029, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.018362860009074212, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.018362860009074212, "signal/batch_coverage_15/centered_abs_mean": 0.18449074327945708, "signal/batch_coverage_15/group_std_mean": 0.24565376341342926, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018449075147509576, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.018449075147509576, "signal/batch_coverage_20/centered_abs_mean": 0.18724102675914764, "signal/batch_coverage_20/group_std_mean": 0.24988237917423248, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.018724103271961213, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.018724103271961213, "signal/batch_coverage_25/centered_abs_mean": 0.18788727819919587, "signal/batch_coverage_25/group_std_mean": 0.25117466747760775, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01878872811794281, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01878872811794281, "signal/batch_coverage_5/centered_abs_mean": 0.17459736466407777, "signal/batch_coverage_5/group_std_mean": 0.23254739940166474, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017459736764431, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.017459736764431, "signal/brier_reward/centered_abs_mean": 0.1415181964635849, "signal/brier_reward/group_std_mean": 0.18468352258205414, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014151819795370103, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014151819795370103, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07397404685616493, "signal/confidence_uniqueness_reward/group_std_mean": 0.09191499650478363, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007397404778748751, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007397404778748751, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002988464618101716, "signal/frontier_aurc_reward/group_std_mean": 0.004501758888363838, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.735580867214594e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.735580867214594e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03291532322764397, "signal/frontier_ece_reward/group_std_mean": 0.056967698782682416, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032915322575718165, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032915322575718165, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3701502561569214, "signal/frontier_entropy_batch_reward/group_std_mean": 0.45215981006622313, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0370150275528431, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0370150275528431, "step": 120 }, { "calibration/aurc": 0.42148780620656484, "calibration/batch_distribution_entropy": 0.7692129995349444, "calibration/buffer_distribution_entropy": 0.7382113920548676, "calibration/confidence_entropy": 0.26247756330596594, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.16482077205882353, "calibration/coverage@30%": 0.20041819852941173, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19796225418204746, "calibration/mean_confidence": 0.48151096731969645, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 497.8, "completions/max_terminated_length": 497.8, "completions/mean_length": 196.94169921875, "completions/mean_terminated_length": 197.02088928222656, "completions/min_length": 77.2, "completions/min_terminated_length": 95.8, "epoch": 0.4, "grad_norm": 0.0016334950923919678, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 420660964.0, "reward": 1.2030918598175049, "reward_std": 0.1932498872280121, "rewards/accuracy_reward": 0.45927734375, "rewards/batch_coverage_0": 0.4648968815803528, "rewards/batch_coverage_1": 0.4648968815803528, "rewards/batch_coverage_10": 0.5263925135135651, "rewards/batch_coverage_15": 0.538670289516449, "rewards/batch_coverage_20": 0.5505793452262878, "rewards/batch_coverage_25": 0.5528130173683167, "rewards/batch_coverage_5": 0.4986697494983673, "rewards/brier_reward": 0.7922908544540406, "rewards/confidence_uniqueness_reward": 0.8596806645393371, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003926975373178721, "rewards/frontier_ece_reward": 0.01958938278257847, "rewards/frontier_entropy_batch_reward": -0.531503701210022, "signal/accuracy_reward/centered_abs_mean": 0.116705322265625, "signal/accuracy_reward/group_std_mean": 0.15439043641090394, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0583526611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0583526611328125, "signal/advantage_abs_mean": 0.14729376435279845, "signal/advantage_pre_scale_abs_mean": 0.14729376435279845, "signal/advantage_pre_scale_std": 0.21888698935508727, "signal/advantage_std": 0.21888698935508727, "signal/batch_coverage_0/centered_abs_mean": 0.1779138207435608, "signal/batch_coverage_0/group_std_mean": 0.23396467566490173, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017791382595896722, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017791382595896722, "signal/batch_coverage_1/centered_abs_mean": 0.1779138207435608, "signal/batch_coverage_1/group_std_mean": 0.23396467566490173, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017791382595896722, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017791382595896722, "signal/batch_coverage_10/centered_abs_mean": 0.19597465097904204, "signal/batch_coverage_10/group_std_mean": 0.2574351608753204, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.019597466662526132, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.019597466662526132, "signal/batch_coverage_15/centered_abs_mean": 0.1948534607887268, "signal/batch_coverage_15/group_std_mean": 0.25582134127616885, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.019485345855355263, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.019485345855355263, "signal/batch_coverage_20/centered_abs_mean": 0.20348668098449707, "signal/batch_coverage_20/group_std_mean": 0.26646647453308103, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.020348669216036796, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.020348669216036796, "signal/batch_coverage_25/centered_abs_mean": 0.20679988861083984, "signal/batch_coverage_25/group_std_mean": 0.2701643168926239, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.020679988712072373, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.020679988712072373, "signal/batch_coverage_5/centered_abs_mean": 0.18674066066741943, "signal/batch_coverage_5/group_std_mean": 0.24514356553554534, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01867406629025936, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01867406629025936, "signal/brier_reward/centered_abs_mean": 0.1615391790866852, "signal/brier_reward/group_std_mean": 0.20824775099754333, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016153918392956258, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016153918392956258, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09066232591867447, "signal/confidence_uniqueness_reward/group_std_mean": 0.11036855280399323, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009066233038902282, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009066233038902282, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002485871315, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003549639508128166, "signal/frontier_aurc_reward/group_std_mean": 0.005085006635636091, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.43704950157553e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.43704950157553e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03371543250977993, "signal/frontier_ece_reward/group_std_mean": 0.05844026431441307, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033715431578457355, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033715431578457355, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37131853103637696, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4504863560199738, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03713185340166092, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03713185340166092, "step": 125 }, { "calibration/aurc": 0.36384982656677556, "calibration/batch_distribution_entropy": 0.7584043461756151, "calibration/buffer_distribution_entropy": 0.7437387048920486, "calibration/confidence_entropy": 0.2612899956313475, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.08835769324853229, "calibration/coverage@25%": 0.22243303571428572, "calibration/coverage@30%": 0.3017727189334638, "calibration/coverage@5%": 0.0, "calibration/ece": 0.20871118847848963, "calibration/mean_confidence": 0.49207635194125976, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 539.4, "completions/max_terminated_length": 539.4, "completions/mean_length": 199.337890625, "completions/mean_terminated_length": 199.41550903320314, "completions/min_length": 38.6, "completions/min_terminated_length": 92.8, "epoch": 0.416, "grad_norm": 0.0018792530754581094, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 437583368.0, "reward": 1.1987483978271485, "reward_std": 0.18848486244678497, "rewards/accuracy_reward": 0.4486328125, "rewards/batch_coverage_0": 0.4875182926654816, "rewards/batch_coverage_1": 0.4875182926654816, "rewards/batch_coverage_10": 0.5274044513702393, "rewards/batch_coverage_15": 0.5361409902572631, "rewards/batch_coverage_20": 0.5449886083602905, "rewards/batch_coverage_25": 0.545144772529602, "rewards/batch_coverage_5": 0.5098370552062989, "rewards/brier_reward": 0.7905206561088562, "rewards/confidence_uniqueness_reward": 0.8628069043159485, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0038138974457979204, "rewards/frontier_ece_reward": 0.017152907513082027, "rewards/frontier_entropy_batch_reward": -0.5617950916290283, "signal/accuracy_reward/centered_abs_mean": 0.12532958984375, "signal/accuracy_reward/group_std_mean": 0.1621655359864235, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062664794921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062664794921875, "signal/advantage_abs_mean": 0.14475196599960327, "signal/advantage_pre_scale_abs_mean": 0.14475196599960327, "signal/advantage_pre_scale_std": 0.2165150135755539, "signal/advantage_std": 0.2165150135755539, "signal/batch_coverage_0/centered_abs_mean": 0.1752574473619461, "signal/batch_coverage_0/group_std_mean": 0.23077193200588225, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017525745183229448, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017525745183229448, "signal/batch_coverage_1/centered_abs_mean": 0.1752574473619461, "signal/batch_coverage_1/group_std_mean": 0.23077193200588225, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017525745183229448, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017525745183229448, "signal/batch_coverage_10/centered_abs_mean": 0.18302838802337645, "signal/batch_coverage_10/group_std_mean": 0.242452073097229, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01830283962190151, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01830283962190151, "signal/batch_coverage_15/centered_abs_mean": 0.1806130141019821, "signal/batch_coverage_15/group_std_mean": 0.24002839624881744, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.018061301857233047, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.018061301857233047, "signal/batch_coverage_20/centered_abs_mean": 0.18715793490409852, "signal/batch_coverage_20/group_std_mean": 0.2486134171485901, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.018715794384479522, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.018715794384479522, "signal/batch_coverage_25/centered_abs_mean": 0.18681722283363342, "signal/batch_coverage_25/group_std_mean": 0.2479231745004654, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01868172250688076, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01868172250688076, "signal/batch_coverage_5/centered_abs_mean": 0.17968855500221254, "signal/batch_coverage_5/group_std_mean": 0.23696467280387878, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017968856170773505, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.017968856170773505, "signal/brier_reward/centered_abs_mean": 0.16554119884967805, "signal/brier_reward/group_std_mean": 0.21077493131160735, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01655412055552006, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01655412055552006, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08097566366195678, "signal/confidence_uniqueness_reward/group_std_mean": 0.10046249628067017, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008097566477954388, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008097566477954388, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_std_mean": 0.0024258273653686045, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032779529225081205, "signal/frontier_aurc_reward/group_std_mean": 0.00480236979201436, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0974414150696245e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0974414150696245e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.029453156888484953, "signal/frontier_ece_reward/group_std_mean": 0.051109229773283006, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00294531574472785, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00294531574472785, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3639026403427124, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4436567842960358, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.034375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03639026433229446, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03639026433229446, "step": 130 }, { "calibration/aurc": 0.2638975584336835, "calibration/batch_distribution_entropy": 0.7188330079276255, "calibration/buffer_distribution_entropy": 0.7579654834999048, "calibration/confidence_entropy": 0.23194149517651214, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1719285102739726, "calibration/coverage@15%": 0.2978420070939335, "calibration/coverage@20%": 0.4002568493150685, "calibration/coverage@25%": 0.44481256115459883, "calibration/coverage@30%": 0.5624258500489236, "calibration/coverage@5%": 0.101171875, "calibration/ece": 0.15543515240158406, "calibration/mean_confidence": 0.5313769595340647, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 578.8, "completions/max_terminated_length": 578.8, "completions/mean_length": 196.31123046875, "completions/mean_terminated_length": 196.3304870605469, "completions/min_length": 77.0, "completions/min_terminated_length": 95.8, "epoch": 0.432, "grad_norm": 0.0018460171995684505, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 454607931.0, "reward": 1.2525221824645996, "reward_std": 0.16612263917922973, "rewards/accuracy_reward": 0.49345703125, "rewards/batch_coverage_0": 0.5271356880664826, "rewards/batch_coverage_1": 0.5271356880664826, "rewards/batch_coverage_10": 0.572896933555603, "rewards/batch_coverage_15": 0.5773912191390991, "rewards/batch_coverage_20": 0.5841198205947876, "rewards/batch_coverage_25": 0.5851166129112244, "rewards/batch_coverage_5": 0.5565871596336365, "rewards/brier_reward": 0.8180199027061462, "rewards/confidence_uniqueness_reward": 0.8616907119750976, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027001814683899283, "rewards/frontier_ece_reward": 0.020215665549039842, "rewards/frontier_entropy_batch_reward": -0.5710589647293091, "signal/accuracy_reward/centered_abs_mean": 0.111077880859375, "signal/accuracy_reward/group_std_mean": 0.14017398059368133, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0555389404296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0555389404296875, "signal/advantage_abs_mean": 0.12592501640319825, "signal/advantage_pre_scale_abs_mean": 0.12592501640319825, "signal/advantage_pre_scale_std": 0.2006388008594513, "signal/advantage_std": 0.2006388008594513, "signal/batch_coverage_0/centered_abs_mean": 0.1553127273917198, "signal/batch_coverage_0/group_std_mean": 0.20583226680755615, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015531273372471333, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.015531273372471333, "signal/batch_coverage_1/centered_abs_mean": 0.1553127273917198, "signal/batch_coverage_1/group_std_mean": 0.20583226680755615, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015531273372471333, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.015531273372471333, "signal/batch_coverage_10/centered_abs_mean": 0.16552983224391937, "signal/batch_coverage_10/group_std_mean": 0.2202944576740265, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016552984528243542, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016552984528243542, "signal/batch_coverage_15/centered_abs_mean": 0.16490549743175506, "signal/batch_coverage_15/group_std_mean": 0.21991809606552123, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016490550339221956, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016490550339221956, "signal/batch_coverage_20/centered_abs_mean": 0.16641467809677124, "signal/batch_coverage_20/group_std_mean": 0.22252885103225709, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01664146836847067, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01664146836847067, "signal/batch_coverage_25/centered_abs_mean": 0.16553274691104888, "signal/batch_coverage_25/group_std_mean": 0.22252612709999084, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016553275845944883, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016553275845944883, "signal/batch_coverage_5/centered_abs_mean": 0.15990071445703508, "signal/batch_coverage_5/group_std_mean": 0.21237186789512635, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015990072302520274, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.015990072302520274, "signal/brier_reward/centered_abs_mean": 0.14135312736034394, "signal/brier_reward/group_std_mean": 0.1822331041097641, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014135313406586646, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014135313406586646, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07747237235307694, "signal/confidence_uniqueness_reward/group_std_mean": 0.09563995599746704, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007747237477451563, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007747237477451563, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024171784985810517, "signal/frontier_aurc_reward/group_std_mean": 0.0035217163152992726, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0214731305022723e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0214731305022723e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.027930035442113876, "signal/frontier_ece_reward/group_std_mean": 0.050855685770511624, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002793003572151065, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002793003572151065, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.355313766002655, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43729751706123354, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0355313777923584, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0355313777923584, "step": 135 }, { "calibration/aurc": 0.2780457111426582, "calibration/batch_distribution_entropy": 0.7576471482008242, "calibration/buffer_distribution_entropy": 0.7712803345094807, "calibration/confidence_entropy": 0.2737813753667616, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.03203125, "calibration/coverage@15%": 0.1234375, "calibration/coverage@20%": 0.24453125, "calibration/coverage@25%": 0.47265625, "calibration/coverage@30%": 0.568359375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.18375648447289655, "calibration/mean_confidence": 0.5859185726642646, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 542.2, "completions/max_terminated_length": 542.2, "completions/mean_length": 199.802734375, "completions/mean_terminated_length": 199.82284851074218, "completions/min_length": 76.0, "completions/min_terminated_length": 76.4, "epoch": 0.448, "grad_norm": 0.002454441273584962, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 471606711.0, "reward": 1.2395191192626953, "reward_std": 0.18239690661430358, "rewards/accuracy_reward": 0.47587890625, "rewards/batch_coverage_0": 0.5014855623245239, "rewards/batch_coverage_1": 0.5014855623245239, "rewards/batch_coverage_10": 0.5618399024009705, "rewards/batch_coverage_15": 0.5709842443466187, "rewards/batch_coverage_20": 0.5739490628242493, "rewards/batch_coverage_25": 0.575732946395874, "rewards/batch_coverage_5": 0.5394026637077332, "rewards/brier_reward": 0.8151318907737732, "rewards/confidence_uniqueness_reward": 0.899455189704895, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002985543245449662, "rewards/frontier_ece_reward": 0.0208277877420187, "rewards/frontier_entropy_batch_reward": -0.542172086238861, "signal/accuracy_reward/centered_abs_mean": 0.103057861328125, "signal/accuracy_reward/group_std_mean": 0.1395617365837097, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515289306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0515289306640625, "signal/advantage_abs_mean": 0.13970653414726258, "signal/advantage_pre_scale_abs_mean": 0.13970653414726258, "signal/advantage_pre_scale_std": 0.20998115837574005, "signal/advantage_std": 0.20998115837574005, "signal/batch_coverage_0/centered_abs_mean": 0.17447925209999085, "signal/batch_coverage_0/group_std_mean": 0.2283736675977707, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.017447925359010696, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.017447925359010696, "signal/batch_coverage_1/centered_abs_mean": 0.17447925209999085, "signal/batch_coverage_1/group_std_mean": 0.2283736675977707, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.017447925359010696, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.017447925359010696, "signal/batch_coverage_10/centered_abs_mean": 0.19056746661663054, "signal/batch_coverage_10/group_std_mean": 0.24970765709877013, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01905674673616886, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01905674673616886, "signal/batch_coverage_15/centered_abs_mean": 0.19551347494125365, "signal/batch_coverage_15/group_std_mean": 0.25578183233737944, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01955134831368923, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01955134831368923, "signal/batch_coverage_20/centered_abs_mean": 0.19734205305576324, "signal/batch_coverage_20/group_std_mean": 0.2576554536819458, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.019734205678105354, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.019734205678105354, "signal/batch_coverage_25/centered_abs_mean": 0.19722734689712523, "signal/batch_coverage_25/group_std_mean": 0.2575598418712616, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01972273513674736, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01972273513674736, "signal/batch_coverage_5/centered_abs_mean": 0.18471661508083342, "signal/batch_coverage_5/group_std_mean": 0.24224046170711516, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01847166083753109, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01847166083753109, "signal/brier_reward/centered_abs_mean": 0.15095800459384917, "signal/brier_reward/group_std_mean": 0.19546601474285125, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015095800533890725, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015095800533890725, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.052432583272457124, "signal/confidence_uniqueness_reward/group_std_mean": 0.06639125794172288, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005243258271366358, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005243258271366358, "signal/format_reward/centered_abs_mean": 0.00072021484375, "signal/format_reward/group_std_mean": 0.001477878913283348, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000360107421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000360107421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022633123211562634, "signal/frontier_aurc_reward/group_std_mean": 0.0034012056421488523, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8291404669289476e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8291404669289476e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.032904643565416336, "signal/frontier_ece_reward/group_std_mean": 0.059289424866437915, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00329046449624002, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00329046449624002, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36733065247535707, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44678170084953306, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03673306554555893, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03673306554555893, "step": 140 }, { "calibration/aurc": 0.4133911892897408, "calibration/batch_distribution_entropy": 0.7790395321052032, "calibration/buffer_distribution_entropy": 0.7791011137289166, "calibration/confidence_entropy": 0.2779780658776635, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.096875, "calibration/coverage@25%": 0.1640625, "calibration/coverage@30%": 0.331640625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1923393379600086, "calibration/mean_confidence": 0.4937273238161442, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 718.2, "completions/max_terminated_length": 718.2, "completions/mean_length": 204.9798828125, "completions/mean_terminated_length": 205.0396270751953, "completions/min_length": 32.0, "completions/min_terminated_length": 86.4, "epoch": 0.464, "grad_norm": 0.0015804837457835674, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 488876521.0, "reward": 1.164589262008667, "reward_std": 0.1838443696498871, "rewards/accuracy_reward": 0.408984375, "rewards/batch_coverage_0": 0.4630319595336914, "rewards/batch_coverage_1": 0.4630319595336914, "rewards/batch_coverage_10": 0.5065688967704773, "rewards/batch_coverage_15": 0.5105774939060211, "rewards/batch_coverage_20": 0.5268525779247284, "rewards/batch_coverage_25": 0.5302481591701508, "rewards/batch_coverage_5": 0.48869837522506715, "rewards/brier_reward": 0.7661522626876831, "rewards/confidence_uniqueness_reward": 0.8857262253761291, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.004124029399827122, "rewards/frontier_ece_reward": 0.010478467494249345, "rewards/frontier_entropy_batch_reward": -0.5479272603988647, "signal/accuracy_reward/centered_abs_mean": 0.1063720703125, "signal/accuracy_reward/group_std_mean": 0.14106076657772065, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05318603515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05318603515625, "signal/advantage_abs_mean": 0.1393085926771164, "signal/advantage_pre_scale_abs_mean": 0.1393085926771164, "signal/advantage_pre_scale_std": 0.2106298953294754, "signal/advantage_std": 0.2106298953294754, "signal/batch_coverage_0/centered_abs_mean": 0.16174939572811126, "signal/batch_coverage_0/group_std_mean": 0.2152931123971939, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016174939833581446, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.016174939833581446, "signal/batch_coverage_1/centered_abs_mean": 0.16174939572811126, "signal/batch_coverage_1/group_std_mean": 0.2152931123971939, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016174939833581446, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.016174939833581446, "signal/batch_coverage_10/centered_abs_mean": 0.1717519074678421, "signal/batch_coverage_10/group_std_mean": 0.22983591854572297, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01717519052326679, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01717519052326679, "signal/batch_coverage_15/centered_abs_mean": 0.17370556890964509, "signal/batch_coverage_15/group_std_mean": 0.23222627639770507, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01737055741250515, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01737055741250515, "signal/batch_coverage_20/centered_abs_mean": 0.18301475644111634, "signal/batch_coverage_20/group_std_mean": 0.24411162734031677, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01830147597938776, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01830147597938776, "signal/batch_coverage_25/centered_abs_mean": 0.18677313327789308, "signal/batch_coverage_25/group_std_mean": 0.2487118124961853, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01867731362581253, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01867731362581253, "signal/batch_coverage_5/centered_abs_mean": 0.16763336062431336, "signal/batch_coverage_5/group_std_mean": 0.22398186922073365, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016763335466384886, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.016763335466384886, "signal/brier_reward/centered_abs_mean": 0.15969540774822236, "signal/brier_reward/group_std_mean": 0.20515718460083007, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015969540737569333, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015969540737569333, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.058282620459795, "signal/confidence_uniqueness_reward/group_std_mean": 0.07511355727910995, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058282620273530485, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058282620273530485, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00264003137126565, "signal/frontier_aurc_reward/group_std_mean": 0.003911961335688829, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.300039425084833e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.300039425084833e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.035570315271615985, "signal/frontier_ece_reward/group_std_mean": 0.062593774497509, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003557031648233533, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003557031648233533, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3604434788227081, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4358489096164703, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.040625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03604434877634048, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03604434877634048, "step": 145 }, { "calibration/aurc": 0.32654655608043387, "calibration/batch_distribution_entropy": 0.7668590127145694, "calibration/buffer_distribution_entropy": 0.7802321533968591, "calibration/confidence_entropy": 0.2612017339736417, "calibration/coverage@0%": 0.033203125, "calibration/coverage@1%": 0.033203125, "calibration/coverage@10%": 0.073046875, "calibration/coverage@15%": 0.084765625, "calibration/coverage@20%": 0.266796875, "calibration/coverage@25%": 0.36484375, "calibration/coverage@30%": 0.3953125, "calibration/coverage@5%": 0.0609375, "calibration/ece": 0.19328091315566903, "calibration/mean_confidence": 0.4979834022668042, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 675.4, "completions/max_terminated_length": 675.4, "completions/mean_length": 208.08876953125, "completions/mean_terminated_length": 208.19093627929686, "completions/min_length": 38.8, "completions/min_terminated_length": 96.2, "epoch": 0.48, "grad_norm": 0.00185799365863204, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 506055382.0, "reward": 1.2049114227294921, "reward_std": 0.17455832958221434, "rewards/accuracy_reward": 0.46943359375, "rewards/batch_coverage_0": 0.4818290829658508, "rewards/batch_coverage_1": 0.4818290829658508, "rewards/batch_coverage_10": 0.5297632455825806, "rewards/batch_coverage_15": 0.5313080787658692, "rewards/batch_coverage_20": 0.5360439538955688, "rewards/batch_coverage_25": 0.5369215071201324, "rewards/batch_coverage_5": 0.5070482075214386, "rewards/brier_reward": 0.7813033342361451, "rewards/confidence_uniqueness_reward": 0.8709333419799805, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0030639852862805127, "rewards/frontier_ece_reward": 0.015050551109015941, "rewards/frontier_entropy_batch_reward": -0.5662830948829651, "signal/accuracy_reward/centered_abs_mean": 0.110113525390625, "signal/accuracy_reward/group_std_mean": 0.15058882236480714, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0550567626953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0550567626953125, "signal/advantage_abs_mean": 0.13103221952915192, "signal/advantage_pre_scale_abs_mean": 0.13103221952915192, "signal/advantage_pre_scale_std": 0.2030962586402893, "signal/advantage_std": 0.2030962586402893, "signal/batch_coverage_0/centered_abs_mean": 0.15617549121379853, "signal/batch_coverage_0/group_std_mean": 0.20788954198360443, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01561754960566759, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01561754960566759, "signal/batch_coverage_1/centered_abs_mean": 0.15617549121379853, "signal/batch_coverage_1/group_std_mean": 0.20788954198360443, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01561754960566759, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01561754960566759, "signal/batch_coverage_10/centered_abs_mean": 0.16319622099399567, "signal/batch_coverage_10/group_std_mean": 0.21909156441688538, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01631962265819311, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01631962265819311, "signal/batch_coverage_15/centered_abs_mean": 0.16425377130508423, "signal/batch_coverage_15/group_std_mean": 0.22029703259468078, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016425377689301968, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016425377689301968, "signal/batch_coverage_20/centered_abs_mean": 0.16655297875404357, "signal/batch_coverage_20/group_std_mean": 0.22349075376987457, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016655297577381135, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016655297577381135, "signal/batch_coverage_25/centered_abs_mean": 0.16626424193382264, "signal/batch_coverage_25/group_std_mean": 0.22361468076705932, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016626424714922906, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016626424714922906, "signal/batch_coverage_5/centered_abs_mean": 0.16179881393909454, "signal/batch_coverage_5/group_std_mean": 0.2154046893119812, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016179881058633327, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.016179881058633327, "signal/brier_reward/centered_abs_mean": 0.15501052141189575, "signal/brier_reward/group_std_mean": 0.19950452148914338, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015501052141189575, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015501052141189575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06272363662719727, "signal/confidence_uniqueness_reward/group_std_mean": 0.08009307086467743, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006272363662719727, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006272363662719727, "signal/format_reward/centered_abs_mean": 0.001300048828125, "signal/format_reward/group_std_mean": 0.0031943732406944036, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021038135048002003, "signal/frontier_aurc_reward/group_std_mean": 0.002958751143887639, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6297669319319538e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6297669319319538e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.030137370526790618, "signal/frontier_ece_reward/group_std_mean": 0.0553176075220108, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003013736940920353, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003013736940920353, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3504864811897278, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42903093695640565, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.034375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03504864946007728, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03504864946007728, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.4884404305130252, "eval_calibration/batch_distribution_entropy": 0.7037518889147907, "eval_calibration/buffer_distribution_entropy": 0.7807073277954115, "eval_calibration/confidence_entropy": 0.24880659410045255, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.078125, "eval_calibration/coverage@20%": 0.09375, "eval_calibration/coverage@25%": 0.125, "eval_calibration/coverage@30%": 0.1328125, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.2413991424156957, "eval_calibration/mean_confidence": 0.46008607263806767, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 441.0, "eval_completions/max_terminated_length": 441.0, "eval_completions/mean_length": 210.2900733947754, "eval_completions/mean_terminated_length": 210.2900733947754, "eval_completions/min_length": 109.0, "eval_completions/min_terminated_length": 109.0, "eval_loss": 0.0, "eval_num_tokens": 506055382.0, "eval_reward": 0.9580488353967667, "eval_reward_std": 0.2786233425140381, "eval_rewards/accuracy_reward": 0.3828125, "eval_rewards/batch_coverage_0": 0.2982771582901478, "eval_rewards/batch_coverage_1": 0.2982771582901478, "eval_rewards/batch_coverage_10": 0.2973833717405796, "eval_rewards/batch_coverage_15": 0.2931292913854122, "eval_rewards/batch_coverage_20": 0.28491435572505, "eval_rewards/batch_coverage_25": 0.26486651226878166, "eval_rewards/batch_coverage_5": 0.2982771582901478, "eval_rewards/brier_reward": 0.794169008731842, "eval_rewards/confidence_uniqueness_reward": 0.8291015625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0035186284221708775, "eval_rewards/frontier_ece_reward": 0.008470115077216178, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.6013, "eval_samples_per_second": 22.123, "eval_signal/accuracy_reward/centered_abs_mean": 0.4619140625, "eval_signal/accuracy_reward/group_std_mean": 0.487694188952446, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23095703125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23095703125, "eval_signal/advantage_abs_mean": 0.23249849677085876, "eval_signal/advantage_pre_scale_abs_mean": 0.23249849677085876, "eval_signal/advantage_pre_scale_std": 0.27607636898756027, "eval_signal/advantage_std": 0.27607636898756027, "eval_signal/batch_coverage_0/centered_abs_mean": 0.4355214685201645, "eval_signal/batch_coverage_0/group_std_mean": 0.5041546821594238, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04355214722454548, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04355214722454548, "eval_signal/batch_coverage_1/centered_abs_mean": 0.4355214685201645, "eval_signal/batch_coverage_1/group_std_mean": 0.5041546821594238, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04355214722454548, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04355214722454548, "eval_signal/batch_coverage_10/centered_abs_mean": 0.43344543874263763, "eval_signal/batch_coverage_10/group_std_mean": 0.5016847252845764, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04334454517811537, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04334454517811537, "eval_signal/batch_coverage_15/centered_abs_mean": 0.42580925673246384, "eval_signal/batch_coverage_15/group_std_mean": 0.4928712695837021, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0425809258595109, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0425809258595109, "eval_signal/batch_coverage_20/centered_abs_mean": 0.4136809632182121, "eval_signal/batch_coverage_20/group_std_mean": 0.4787442535161972, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04136809799820185, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.04136809799820185, "eval_signal/batch_coverage_25/centered_abs_mean": 0.3728942573070526, "eval_signal/batch_coverage_25/group_std_mean": 0.43325306475162506, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03728942573070526, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03728942573070526, "eval_signal/batch_coverage_5/centered_abs_mean": 0.4355214685201645, "eval_signal/batch_coverage_5/group_std_mean": 0.5041546821594238, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04355214722454548, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04355214722454548, "eval_signal/brier_reward/centered_abs_mean": 0.27639148384332657, "eval_signal/brier_reward/group_std_mean": 0.34276602417230606, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027639148756861687, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027639148756861687, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0998077392578125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12010791897773743, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009980774368159473, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009980774368159473, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00420642871176824, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007171055534854531, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.25803607160924e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.25803607160924e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03641515225172043, "eval_signal/frontier_ece_reward/group_std_mean": 0.05970622505992651, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036415152717381716, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036415152717381716, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.177, "step": 150 }, { "calibration/aurc": 0.38867555529956505, "calibration/batch_distribution_entropy": 0.744077371845278, "calibration/buffer_distribution_entropy": 0.7789951146287379, "calibration/confidence_entropy": 0.2551266385058731, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.16171875, "calibration/coverage@15%": 0.177734375, "calibration/coverage@20%": 0.2109375, "calibration/coverage@25%": 0.240625, "calibration/coverage@30%": 0.301171875, "calibration/coverage@5%": 0.1078125, "calibration/ece": 0.18511672998881415, "calibration/mean_confidence": 0.5120126299693563, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 588.2, "completions/max_terminated_length": 588.2, "completions/mean_length": 208.57919921875, "completions/mean_terminated_length": 208.62011108398437, "completions/min_length": 75.6, "completions/min_terminated_length": 96.0, "epoch": 0.496, "grad_norm": 0.0018345050048083067, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 523499073.0, "reward": 1.2330967903137207, "reward_std": 0.1739658534526825, "rewards/accuracy_reward": 0.5115234375, "rewards/batch_coverage_0": 0.4806299567222595, "rewards/batch_coverage_1": 0.4806299567222595, "rewards/batch_coverage_10": 0.5262403607368469, "rewards/batch_coverage_15": 0.5340194821357727, "rewards/batch_coverage_20": 0.548810887336731, "rewards/batch_coverage_25": 0.5515802383422852, "rewards/batch_coverage_5": 0.5057286143302917, "rewards/brier_reward": 0.7810137271881104, "rewards/confidence_uniqueness_reward": 0.8951348662376404, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0029973339987918735, "rewards/frontier_ece_reward": 0.01581182572990656, "rewards/frontier_entropy_batch_reward": -0.5444094181060791, "signal/accuracy_reward/centered_abs_mean": 0.10760498046875, "signal/accuracy_reward/group_std_mean": 0.13941818177700044, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053802490234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.053802490234375, "signal/advantage_abs_mean": 0.13376112133264542, "signal/advantage_pre_scale_abs_mean": 0.13376112133264542, "signal/advantage_pre_scale_std": 0.20336721539497377, "signal/advantage_std": 0.20336721539497377, "signal/batch_coverage_0/centered_abs_mean": 0.16944837868213652, "signal/batch_coverage_0/group_std_mean": 0.2214631974697113, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.016944839060306548, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.016944839060306548, "signal/batch_coverage_1/centered_abs_mean": 0.16944837868213652, "signal/batch_coverage_1/group_std_mean": 0.2214631974697113, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.016944839060306548, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.016944839060306548, "signal/batch_coverage_10/centered_abs_mean": 0.17917440831661224, "signal/batch_coverage_10/group_std_mean": 0.23413580656051636, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017917441949248315, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.017917441949248315, "signal/batch_coverage_15/centered_abs_mean": 0.17658520340919495, "signal/batch_coverage_15/group_std_mean": 0.23149906396865844, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.017658520489931107, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.017658520489931107, "signal/batch_coverage_20/centered_abs_mean": 0.1827937513589859, "signal/batch_coverage_20/group_std_mean": 0.24057833552360536, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01827937588095665, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01827937588095665, "signal/batch_coverage_25/centered_abs_mean": 0.1835138112306595, "signal/batch_coverage_25/group_std_mean": 0.24196174442768098, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01835138164460659, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01835138164460659, "signal/batch_coverage_5/centered_abs_mean": 0.1757124364376068, "signal/batch_coverage_5/group_std_mean": 0.2290509968996048, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.017571244016289712, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.017571244016289712, "signal/brier_reward/centered_abs_mean": 0.1527266710996628, "signal/brier_reward/group_std_mean": 0.1937905639410019, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015272667445242406, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015272667445242406, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.053154267370700836, "signal/confidence_uniqueness_reward/group_std_mean": 0.06698481962084771, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005315426737070084, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005315426737070084, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814434766769, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002370417304337025, "signal/frontier_aurc_reward/group_std_mean": 0.003476083744317293, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.963021761388518e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.963021761388518e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.025968721136450768, "signal/frontier_ece_reward/group_std_mean": 0.04501113593578339, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025968722999095916, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025968722999095916, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3555388033390045, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43377270102500914, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035553880780935285, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035553880780935285, "step": 155 }, { "calibration/aurc": 0.3188977987264959, "calibration/batch_distribution_entropy": 0.7773323065899319, "calibration/buffer_distribution_entropy": 0.7764884772212833, "calibration/confidence_entropy": 0.2696159268208872, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.186328125, "calibration/coverage@15%": 0.290234375, "calibration/coverage@20%": 0.344140625, "calibration/coverage@25%": 0.402734375, "calibration/coverage@30%": 0.44765625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17410244165023042, "calibration/mean_confidence": 0.5204375945705545, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 648.2, "completions/max_terminated_length": 648.2, "completions/mean_length": 203.21279296875, "completions/mean_terminated_length": 203.2727264404297, "completions/min_length": 36.8, "completions/min_terminated_length": 95.4, "epoch": 0.512, "grad_norm": 0.001649328856728971, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 540725636.0, "reward": 1.2527109146118165, "reward_std": 0.175427907705307, "rewards/accuracy_reward": 0.51953125, "rewards/batch_coverage_0": 0.49197604656219485, "rewards/batch_coverage_1": 0.49197604656219485, "rewards/batch_coverage_10": 0.5528481423854827, "rewards/batch_coverage_15": 0.5631466746330261, "rewards/batch_coverage_20": 0.5689069867134094, "rewards/batch_coverage_25": 0.5744830250740052, "rewards/batch_coverage_5": 0.5323562324047089, "rewards/brier_reward": 0.8082866907119751, "rewards/confidence_uniqueness_reward": 0.8861725449562072, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002741323783993721, "rewards/frontier_ece_reward": 0.01900950875133276, "rewards/frontier_entropy_batch_reward": -0.556925094127655, "signal/accuracy_reward/centered_abs_mean": 0.11180419921875, "signal/accuracy_reward/group_std_mean": 0.14193963408470153, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.055902099609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.055902099609375, "signal/advantage_abs_mean": 0.13240396827459336, "signal/advantage_pre_scale_abs_mean": 0.13240396827459336, "signal/advantage_pre_scale_std": 0.20585049986839293, "signal/advantage_std": 0.20585049986839293, "signal/batch_coverage_0/centered_abs_mean": 0.15534441769123078, "signal/batch_coverage_0/group_std_mean": 0.20446575582027435, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01553444191813469, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01553444191813469, "signal/batch_coverage_1/centered_abs_mean": 0.15534441769123078, "signal/batch_coverage_1/group_std_mean": 0.20446575582027435, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01553444191813469, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01553444191813469, "signal/batch_coverage_10/centered_abs_mean": 0.16945621967315674, "signal/batch_coverage_10/group_std_mean": 0.22647275328636168, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016945621743798257, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016945621743798257, "signal/batch_coverage_15/centered_abs_mean": 0.1726034849882126, "signal/batch_coverage_15/group_std_mean": 0.23102477192878723, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.017260348610579967, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.017260348610579967, "signal/batch_coverage_20/centered_abs_mean": 0.17522413432598113, "signal/batch_coverage_20/group_std_mean": 0.234201380610466, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017522412911057474, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017522412911057474, "signal/batch_coverage_25/centered_abs_mean": 0.1778273493051529, "signal/batch_coverage_25/group_std_mean": 0.23738239705562592, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0177827350795269, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0177827350795269, "signal/batch_coverage_5/centered_abs_mean": 0.1642172545194626, "signal/batch_coverage_5/group_std_mean": 0.21815676391124725, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016421726159751415, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.016421726159751415, "signal/brier_reward/centered_abs_mean": 0.14843180775642395, "signal/brier_reward/group_std_mean": 0.19130195081233978, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01484318058937788, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01484318058937788, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06038021594285965, "signal/confidence_uniqueness_reward/group_std_mean": 0.07598417848348618, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006038021761924028, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006038021761924028, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025649062590673566, "signal/frontier_aurc_reward/group_std_mean": 0.0038140499033033847, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.206132823834196e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.206132823834196e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.023592386022210123, "signal/frontier_ece_reward/group_std_mean": 0.03882751725614071, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023592386161908506, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023592386161908506, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36873559951782225, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44527164101600647, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036873559653759005, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036873559653759005, "step": 160 }, { "calibration/aurc": 0.22378652873622512, "calibration/batch_distribution_entropy": 0.7426672349348944, "calibration/buffer_distribution_entropy": 0.7743966444436385, "calibration/confidence_entropy": 0.23855738268617682, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.30232383578431377, "calibration/coverage@15%": 0.38014705882352945, "calibration/coverage@20%": 0.466141237745098, "calibration/coverage@25%": 0.611890318627451, "calibration/coverage@30%": 0.698265931372549, "calibration/coverage@5%": 0.20137101715686273, "calibration/ece": 0.1219204127819263, "calibration/mean_confidence": 0.550545181805313, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 593.4, "completions/max_terminated_length": 593.4, "completions/mean_length": 197.36845703125, "completions/mean_terminated_length": 197.5027648925781, "completions/min_length": 39.0, "completions/min_terminated_length": 92.4, "epoch": 0.528, "grad_norm": 0.0024422414135187864, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 557776225.0, "reward": 1.2554330110549927, "reward_std": 0.16490131318569184, "rewards/accuracy_reward": 0.5, "rewards/batch_coverage_0": 0.5215178728103638, "rewards/batch_coverage_1": 0.5215178728103638, "rewards/batch_coverage_10": 0.5667154312133789, "rewards/batch_coverage_15": 0.5763909935951232, "rewards/batch_coverage_20": 0.5892943024635315, "rewards/batch_coverage_25": 0.5948551058769226, "rewards/batch_coverage_5": 0.544804036617279, "rewards/brier_reward": 0.8188990354537964, "rewards/confidence_uniqueness_reward": 0.8539436340332032, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002834248635917902, "rewards/frontier_ece_reward": 0.016052440367639066, "rewards/frontier_entropy_batch_reward": -0.5458881616592407, "signal/accuracy_reward/centered_abs_mean": 0.11424560546875, "signal/accuracy_reward/group_std_mean": 0.14434780478477477, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.057122802734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.057122802734375, "signal/advantage_abs_mean": 0.12325243949890137, "signal/advantage_pre_scale_abs_mean": 0.12325243949890137, "signal/advantage_pre_scale_std": 0.19943318963050843, "signal/advantage_std": 0.19943318963050843, "signal/batch_coverage_0/centered_abs_mean": 0.15505726933479308, "signal/batch_coverage_0/group_std_mean": 0.20619003772735595, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015505727007985115, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.015505727007985115, "signal/batch_coverage_1/centered_abs_mean": 0.15505726933479308, "signal/batch_coverage_1/group_std_mean": 0.20619003772735595, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015505727007985115, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.015505727007985115, "signal/batch_coverage_10/centered_abs_mean": 0.1644124746322632, "signal/batch_coverage_10/group_std_mean": 0.21964193284511566, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016441247425973415, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016441247425973415, "signal/batch_coverage_15/centered_abs_mean": 0.1649382770061493, "signal/batch_coverage_15/group_std_mean": 0.22163851857185363, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016493828408420085, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016493828408420085, "signal/batch_coverage_20/centered_abs_mean": 0.1724429965019226, "signal/batch_coverage_20/group_std_mean": 0.23197215795516968, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017244300059974193, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017244300059974193, "signal/batch_coverage_25/centered_abs_mean": 0.17294214367866517, "signal/batch_coverage_25/group_std_mean": 0.23340463042259216, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01729421429336071, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01729421429336071, "signal/batch_coverage_5/centered_abs_mean": 0.1575198918581009, "signal/batch_coverage_5/group_std_mean": 0.20969080328941345, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01575198918581009, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01575198918581009, "signal/brier_reward/centered_abs_mean": 0.13578990399837493, "signal/brier_reward/group_std_mean": 0.1776987671852112, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01357899084687233, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01357899084687233, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07442506104707718, "signal/confidence_uniqueness_reward/group_std_mean": 0.09471029192209243, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007442506123334169, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007442506123334169, "signal/format_reward/centered_abs_mean": 0.001251220703125, "signal/format_reward/group_std_mean": 0.0027073150966316463, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002246782067231834, "signal/frontier_aurc_reward/group_std_mean": 0.0032120409421622755, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8084776931791567e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8084776931791567e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.016834354028105736, "signal/frontier_ece_reward/group_std_mean": 0.02396107092499733, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016834354726597666, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016834354726597666, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34070093631744386, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4177111804485321, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.053125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034070093929767606, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034070093929767606, "step": 165 }, { "calibration/aurc": 0.23778958238290318, "calibration/batch_distribution_entropy": 0.6721844309871134, "calibration/buffer_distribution_entropy": 0.7698408958916128, "calibration/confidence_entropy": 0.20618978053254292, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.18515625, "calibration/coverage@15%": 0.325390625, "calibration/coverage@20%": 0.545703125, "calibration/coverage@25%": 0.658984375, "calibration/coverage@30%": 0.73359375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.12480381607492423, "calibration/mean_confidence": 0.5956984779267076, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 609.0, "completions/max_terminated_length": 609.0, "completions/mean_length": 192.44296875, "completions/mean_terminated_length": 192.479736328125, "completions/min_length": 54.0, "completions/min_terminated_length": 92.8, "epoch": 0.544, "grad_norm": 0.002244866918772459, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 574910425.0, "reward": 1.2379089832305907, "reward_std": 0.17852184176445007, "rewards/accuracy_reward": 0.5314453125, "rewards/batch_coverage_0": 0.45688796043395996, "rewards/batch_coverage_1": 0.45688796043395996, "rewards/batch_coverage_10": 0.5334352254867554, "rewards/batch_coverage_15": 0.5431413769721984, "rewards/batch_coverage_20": 0.5495520114898682, "rewards/batch_coverage_25": 0.5513846039772033, "rewards/batch_coverage_5": 0.49837467074394226, "rewards/brier_reward": 0.7937116980552673, "rewards/confidence_uniqueness_reward": 0.8674801826477051, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0031372241675853728, "rewards/frontier_ece_reward": 0.015439392626285553, "rewards/frontier_entropy_batch_reward": -0.5430628538131714, "signal/accuracy_reward/centered_abs_mean": 0.11666259765625, "signal/accuracy_reward/group_std_mean": 0.15614081025123597, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058331298828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.058331298828125, "signal/advantage_abs_mean": 0.1334821343421936, "signal/advantage_pre_scale_abs_mean": 0.1334821343421936, "signal/advantage_pre_scale_std": 0.2087447464466095, "signal/advantage_std": 0.2087447464466095, "signal/batch_coverage_0/centered_abs_mean": 0.15778434574604033, "signal/batch_coverage_0/group_std_mean": 0.20611576437950135, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01577843427658081, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01577843427658081, "signal/batch_coverage_1/centered_abs_mean": 0.15778434574604033, "signal/batch_coverage_1/group_std_mean": 0.20611576437950135, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01577843427658081, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01577843427658081, "signal/batch_coverage_10/centered_abs_mean": 0.1730392426252365, "signal/batch_coverage_10/group_std_mean": 0.2319386124610901, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.017303923889994622, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.017303923889994622, "signal/batch_coverage_15/centered_abs_mean": 0.17587968111038207, "signal/batch_coverage_15/group_std_mean": 0.236191463470459, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01758796814829111, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01758796814829111, "signal/batch_coverage_20/centered_abs_mean": 0.1733997642993927, "signal/batch_coverage_20/group_std_mean": 0.23472839891910552, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017339977063238622, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017339977063238622, "signal/batch_coverage_25/centered_abs_mean": 0.1751231223344803, "signal/batch_coverage_25/group_std_mean": 0.2371717870235443, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01751231253147125, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01751231253147125, "signal/batch_coverage_5/centered_abs_mean": 0.167154985666275, "signal/batch_coverage_5/group_std_mean": 0.22095239162445068, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016715498454868794, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.016715498454868794, "signal/brier_reward/centered_abs_mean": 0.14963855743408203, "signal/brier_reward/group_std_mean": 0.1929429590702057, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014963855780661107, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014963855780661107, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07363773882389069, "signal/confidence_uniqueness_reward/group_std_mean": 0.09168784022331238, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007363773882389069, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007363773882389069, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027165337465703487, "signal/frontier_aurc_reward/group_std_mean": 0.003975105192512274, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.395667154109106e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.395667154109106e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01730995737016201, "signal/frontier_ece_reward/group_std_mean": 0.02390244007110596, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017309957882389426, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017309957882389426, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34708802700042723, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42616881132125856, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.025, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03470880389213562, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03470880389213562, "step": 170 }, { "calibration/aurc": 0.2520918647908457, "calibration/batch_distribution_entropy": 0.7586509591853903, "calibration/buffer_distribution_entropy": 0.7642095306255758, "calibration/confidence_entropy": 0.2576317244833691, "calibration/coverage@0%": 0.098046875, "calibration/coverage@1%": 0.131640625, "calibration/coverage@10%": 0.279296875, "calibration/coverage@15%": 0.339453125, "calibration/coverage@20%": 0.428125, "calibration/coverage@25%": 0.510546875, "calibration/coverage@30%": 0.665234375, "calibration/coverage@5%": 0.171875, "calibration/ece": 0.11032255719491908, "calibration/mean_confidence": 0.5397637404746065, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 191.084765625, "completions/mean_terminated_length": 191.14002685546876, "completions/min_length": 37.4, "completions/min_terminated_length": 89.4, "epoch": 0.56, "grad_norm": 0.001731444033794105, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 591688541.0, "reward": 1.2657613039016724, "reward_std": 0.15425851047039033, "rewards/accuracy_reward": 0.49580078125, "rewards/batch_coverage_0": 0.5337414622306824, "rewards/batch_coverage_1": 0.5337414622306824, "rewards/batch_coverage_10": 0.5779079556465149, "rewards/batch_coverage_15": 0.5824267387390136, "rewards/batch_coverage_20": 0.5927862644195556, "rewards/batch_coverage_25": 0.5977591753005982, "rewards/batch_coverage_5": 0.5679870009422302, "rewards/brier_reward": 0.8221750140190125, "rewards/confidence_uniqueness_reward": 0.8881647944450378, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002653585933148861, "rewards/frontier_ece_reward": 0.0161499110981822, "rewards/frontier_entropy_batch_reward": -0.5319459795951843, "signal/accuracy_reward/centered_abs_mean": 0.085516357421875, "signal/accuracy_reward/group_std_mean": 0.11478217989206314, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0427581787109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0427581787109375, "signal/advantage_abs_mean": 0.11694410741329193, "signal/advantage_pre_scale_abs_mean": 0.11694410741329193, "signal/advantage_pre_scale_std": 0.18468815684318543, "signal/advantage_std": 0.18468815684318543, "signal/batch_coverage_0/centered_abs_mean": 0.14939452409744264, "signal/batch_coverage_0/group_std_mean": 0.19393481314182281, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014939452335238457, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.014939452335238457, "signal/batch_coverage_1/centered_abs_mean": 0.14939452409744264, "signal/batch_coverage_1/group_std_mean": 0.19393481314182281, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014939452335238457, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.014939452335238457, "signal/batch_coverage_10/centered_abs_mean": 0.1572708457708359, "signal/batch_coverage_10/group_std_mean": 0.20482723116874696, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015727085433900355, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.015727085433900355, "signal/batch_coverage_15/centered_abs_mean": 0.15779331028461457, "signal/batch_coverage_15/group_std_mean": 0.2049179792404175, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015779331885278226, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.015779331885278226, "signal/batch_coverage_20/centered_abs_mean": 0.160746768116951, "signal/batch_coverage_20/group_std_mean": 0.20988258421421052, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016074676625430585, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016074676625430585, "signal/batch_coverage_25/centered_abs_mean": 0.16449660658836365, "signal/batch_coverage_25/group_std_mean": 0.2150314450263977, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016449661180377006, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016449661180377006, "signal/batch_coverage_5/centered_abs_mean": 0.15509903728961943, "signal/batch_coverage_5/group_std_mean": 0.2023308277130127, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015509903617203235, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.015509903617203235, "signal/brier_reward/centered_abs_mean": 0.12762503176927567, "signal/brier_reward/group_std_mean": 0.16494102776050568, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012762503698468208, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012762503698468208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05460420995950699, "signal/confidence_uniqueness_reward/group_std_mean": 0.0708104282617569, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005460420995950699, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005460420995950699, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.001886643934994936, "signal/frontier_aurc_reward/group_std_mean": 0.002751070214435458, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.358304955123458e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.358304955123458e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.016662517562508583, "signal/frontier_ece_reward/group_std_mean": 0.023172761127352715, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016662518493831158, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016662518493831158, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3374159514904022, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41330012679100037, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03374159559607506, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03374159559607506, "step": 175 }, { "calibration/aurc": 0.27164267924301744, "calibration/batch_distribution_entropy": 0.7208326409001041, "calibration/buffer_distribution_entropy": 0.7632079280659739, "calibration/confidence_entropy": 0.23629183211104654, "calibration/coverage@0%": 0.042578125, "calibration/coverage@1%": 0.042578125, "calibration/coverage@10%": 0.171484375, "calibration/coverage@15%": 0.3296875, "calibration/coverage@20%": 0.406640625, "calibration/coverage@25%": 0.487890625, "calibration/coverage@30%": 0.565234375, "calibration/coverage@5%": 0.085546875, "calibration/ece": 0.14933425701114764, "calibration/mean_confidence": 0.5480320473725643, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 477.2, "completions/max_terminated_length": 477.2, "completions/mean_length": 192.9146484375, "completions/mean_terminated_length": 192.97347412109374, "completions/min_length": 56.4, "completions/min_terminated_length": 94.4, "epoch": 0.576, "grad_norm": 0.0015635039890184999, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 608850611.0, "reward": 1.2522411346435547, "reward_std": 0.15446202158927919, "rewards/accuracy_reward": 0.4966796875, "rewards/batch_coverage_0": 0.525709193944931, "rewards/batch_coverage_1": 0.525709193944931, "rewards/batch_coverage_10": 0.5797426462173462, "rewards/batch_coverage_15": 0.5850045204162597, "rewards/batch_coverage_20": 0.5929110765457153, "rewards/batch_coverage_25": 0.5955699563026429, "rewards/batch_coverage_5": 0.5522035241127015, "rewards/brier_reward": 0.7857939243316651, "rewards/confidence_uniqueness_reward": 0.8544246196746826, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003201226470991969, "rewards/frontier_ece_reward": 0.013790114596486092, "rewards/frontier_entropy_batch_reward": -0.5694931745529175, "signal/accuracy_reward/centered_abs_mean": 0.07906494140625, "signal/accuracy_reward/group_std_mean": 0.10888843834400178, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039532470703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039532470703125, "signal/advantage_abs_mean": 0.11520133465528488, "signal/advantage_pre_scale_abs_mean": 0.11520133465528488, "signal/advantage_pre_scale_std": 0.19038280248641967, "signal/advantage_std": 0.19038280248641967, "signal/batch_coverage_0/centered_abs_mean": 0.1342694342136383, "signal/batch_coverage_0/group_std_mean": 0.1771334409713745, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013426943868398666, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013426943868398666, "signal/batch_coverage_1/centered_abs_mean": 0.1342694342136383, "signal/batch_coverage_1/group_std_mean": 0.1771334409713745, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013426943868398666, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013426943868398666, "signal/batch_coverage_10/centered_abs_mean": 0.14773536026477813, "signal/batch_coverage_10/group_std_mean": 0.19756182432174682, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014773536287248135, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014773536287248135, "signal/batch_coverage_15/centered_abs_mean": 0.15087927877902985, "signal/batch_coverage_15/group_std_mean": 0.20164599716663362, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015087928250432014, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.015087928250432014, "signal/batch_coverage_20/centered_abs_mean": 0.1539299249649048, "signal/batch_coverage_20/group_std_mean": 0.20657563507556914, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01539299227297306, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01539299227297306, "signal/batch_coverage_25/centered_abs_mean": 0.15464832186698912, "signal/batch_coverage_25/group_std_mean": 0.20785588324069976, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015464832447469235, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015464832447469235, "signal/batch_coverage_5/centered_abs_mean": 0.13860245048999786, "signal/batch_coverage_5/group_std_mean": 0.18345766067504882, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013860245980322362, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013860245980322362, "signal/brier_reward/centered_abs_mean": 0.1274334356188774, "signal/brier_reward/group_std_mean": 0.16567680537700652, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012743343599140644, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012743343599140644, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07045024782419204, "signal/confidence_uniqueness_reward/group_std_mean": 0.09113750010728836, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070450249128043655, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070450249128043655, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021882928907871247, "signal/frontier_aurc_reward/group_std_mean": 0.0032797419466078282, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7353662517271005e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7353662517271005e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.015608221106231213, "signal/frontier_ece_reward/group_std_mean": 0.022461001202464102, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015608221292495728, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015608221292495728, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32071712613105774, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3963967502117157, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03207171261310578, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03207171261310578, "step": 180 }, { "calibration/aurc": 0.2978480243770719, "calibration/batch_distribution_entropy": 0.6714511049146308, "calibration/buffer_distribution_entropy": 0.7618800137320163, "calibration/confidence_entropy": 0.20542300175159228, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.2408054060665362, "calibration/coverage@15%": 0.3275654354207436, "calibration/coverage@20%": 0.4197965080724071, "calibration/coverage@25%": 0.5444311093444227, "calibration/coverage@30%": 0.5936727923189824, "calibration/coverage@5%": 0.187620780332681, "calibration/ece": 0.16323717985124106, "calibration/mean_confidence": 0.5272898830970567, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 527.4, "completions/max_terminated_length": 527.4, "completions/mean_length": 193.88447265625, "completions/mean_terminated_length": 193.92267150878905, "completions/min_length": 57.0, "completions/min_terminated_length": 92.6, "epoch": 0.592, "grad_norm": 0.0016659012762829661, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 626003700.0, "reward": 1.2439999103546142, "reward_std": 0.15648065507411957, "rewards/accuracy_reward": 0.48837890625, "rewards/batch_coverage_0": 0.5292935132980346, "rewards/batch_coverage_1": 0.5292935132980346, "rewards/batch_coverage_10": 0.5772804141044616, "rewards/batch_coverage_15": 0.5800390005111694, "rewards/batch_coverage_20": 0.5900392889976501, "rewards/batch_coverage_25": 0.5927664875984192, "rewards/batch_coverage_5": 0.5604206562042237, "rewards/brier_reward": 0.7853564500808716, "rewards/confidence_uniqueness_reward": 0.833574116230011, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003251887438818812, "rewards/frontier_ece_reward": 0.013345247507095337, "rewards/frontier_entropy_batch_reward": -0.5919211268424988, "signal/accuracy_reward/centered_abs_mean": 0.088482666015625, "signal/accuracy_reward/group_std_mean": 0.12249124199151992, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442413330078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442413330078125, "signal/advantage_abs_mean": 0.11981900781393051, "signal/advantage_pre_scale_abs_mean": 0.11981900781393051, "signal/advantage_pre_scale_std": 0.1967749923467636, "signal/advantage_std": 0.1967749923467636, "signal/batch_coverage_0/centered_abs_mean": 0.14945873618125916, "signal/batch_coverage_0/group_std_mean": 0.19603240489959717, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.014945873618125915, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.014945873618125915, "signal/batch_coverage_1/centered_abs_mean": 0.14945873618125916, "signal/batch_coverage_1/group_std_mean": 0.19603240489959717, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.014945873618125915, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.014945873618125915, "signal/batch_coverage_10/centered_abs_mean": 0.15743134617805482, "signal/batch_coverage_10/group_std_mean": 0.20761019885540008, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.015743134170770647, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.015743134170770647, "signal/batch_coverage_15/centered_abs_mean": 0.1575777143239975, "signal/batch_coverage_15/group_std_mean": 0.20775634944438934, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01575777158141136, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01575777158141136, "signal/batch_coverage_20/centered_abs_mean": 0.1627923756837845, "signal/batch_coverage_20/group_std_mean": 0.21458121240139008, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.016279237903654577, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.016279237903654577, "signal/batch_coverage_25/centered_abs_mean": 0.16482252776622772, "signal/batch_coverage_25/group_std_mean": 0.21744664311408995, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.016482253558933735, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.016482253558933735, "signal/batch_coverage_5/centered_abs_mean": 0.15643575489521028, "signal/batch_coverage_5/group_std_mean": 0.20531278550624849, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.015643575973808766, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.015643575973808766, "signal/brier_reward/centered_abs_mean": 0.13729436993598937, "signal/brier_reward/group_std_mean": 0.17434926629066466, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013729437068104745, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013729437068104745, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08399370908737183, "signal/confidence_uniqueness_reward/group_std_mean": 0.10645784288644791, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00839937087148428, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00839937087148428, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002287204097956419, "signal/frontier_aurc_reward/group_std_mean": 0.003136986354365945, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8590050715138204e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8590050715138204e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.015637117996811868, "signal/frontier_ece_reward/group_std_mean": 0.022227041050791742, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015637118136510252, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015637118136510252, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3143742084503174, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39200871586799624, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03143742233514786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03143742233514786, "step": 185 }, { "calibration/aurc": 0.24269278070177402, "calibration/batch_distribution_entropy": 0.6160343742853823, "calibration/buffer_distribution_entropy": 0.7586620888676098, "calibration/confidence_entropy": 0.18859843739186988, "calibration/coverage@0%": 0.054296875, "calibration/coverage@1%": 0.054296875, "calibration/coverage@10%": 0.3044653799019608, "calibration/coverage@15%": 0.438109681372549, "calibration/coverage@20%": 0.5072886029411764, "calibration/coverage@25%": 0.5827129289215687, "calibration/coverage@30%": 0.6284742647058824, "calibration/coverage@5%": 0.105859375, "calibration/ece": 0.1277596892426967, "calibration/mean_confidence": 0.4764249202730988, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 485.4, "completions/max_terminated_length": 485.4, "completions/mean_length": 191.13359375, "completions/mean_terminated_length": 191.20944213867188, "completions/min_length": 36.4, "completions/min_terminated_length": 94.8, "epoch": 0.608, "grad_norm": 0.0018507551867514849, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 642960396.0, "reward": 1.2837910652160645, "reward_std": 0.13548575043678285, "rewards/accuracy_reward": 0.4826171875, "rewards/batch_coverage_0": 0.5999539256095886, "rewards/batch_coverage_1": 0.5999539256095886, "rewards/batch_coverage_10": 0.6299304246902466, "rewards/batch_coverage_15": 0.6342156767845154, "rewards/batch_coverage_20": 0.641483461856842, "rewards/batch_coverage_25": 0.645661735534668, "rewards/batch_coverage_5": 0.6170846819877625, "rewards/brier_reward": 0.8336545467376709, "rewards/confidence_uniqueness_reward": 0.8381247282028198, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0023502955213189126, "rewards/frontier_ece_reward": 0.016457681730389594, "rewards/frontier_entropy_batch_reward": -0.6294492602348327, "signal/accuracy_reward/centered_abs_mean": 0.0827392578125, "signal/accuracy_reward/group_std_mean": 0.10903512537479401, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04136962890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04136962890625, "signal/advantage_abs_mean": 0.10070695728063583, "signal/advantage_pre_scale_abs_mean": 0.10070695728063583, "signal/advantage_pre_scale_std": 0.17648605406284332, "signal/advantage_std": 0.17648605406284332, "signal/batch_coverage_0/centered_abs_mean": 0.12757147401571273, "signal/batch_coverage_0/group_std_mean": 0.17173427641391753, "signal/batch_coverage_0/group_zero_std_frac": 0.021875, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01275714747607708, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01275714747607708, "signal/batch_coverage_1/centered_abs_mean": 0.12757147401571273, "signal/batch_coverage_1/group_std_mean": 0.17173427641391753, "signal/batch_coverage_1/group_zero_std_frac": 0.021875, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01275714747607708, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01275714747607708, "signal/batch_coverage_10/centered_abs_mean": 0.13172535002231597, "signal/batch_coverage_10/group_std_mean": 0.17821555435657502, "signal/batch_coverage_10/group_zero_std_frac": 0.021875, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01317253541201353, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01317253541201353, "signal/batch_coverage_15/centered_abs_mean": 0.13351904302835466, "signal/batch_coverage_15/group_std_mean": 0.18080573976039888, "signal/batch_coverage_15/group_zero_std_frac": 0.021875, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013351904228329659, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013351904228329659, "signal/batch_coverage_20/centered_abs_mean": 0.13711758852005004, "signal/batch_coverage_20/group_std_mean": 0.18629833459854125, "signal/batch_coverage_20/group_zero_std_frac": 0.021875, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013711759075522422, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013711759075522422, "signal/batch_coverage_25/centered_abs_mean": 0.1425417572259903, "signal/batch_coverage_25/group_std_mean": 0.19299682080745698, "signal/batch_coverage_25/group_zero_std_frac": 0.021875, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014254176057875156, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.014254176057875156, "signal/batch_coverage_5/centered_abs_mean": 0.1316314160823822, "signal/batch_coverage_5/group_std_mean": 0.17660426199436188, "signal/batch_coverage_5/group_zero_std_frac": 0.021875, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0131631413474679, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0131631413474679, "signal/brier_reward/centered_abs_mean": 0.1148117184638977, "signal/brier_reward/group_std_mean": 0.15035415887832643, "signal/brier_reward/group_zero_std_frac": 0.021875, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01148117184638977, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01148117184638977, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07413349598646164, "signal/confidence_uniqueness_reward/group_std_mean": 0.0926214724779129, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.021875, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0074133495800197124, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0074133495800197124, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014779501361772418, "signal/frontier_aurc_reward/group_std_mean": 0.002129552699625492, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8474376884114462e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8474376884114462e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014195657894015312, "signal/frontier_ece_reward/group_std_mean": 0.020859728381037713, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014195657800883054, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014195657800883054, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3070155918598175, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38327420949935914, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0307015597820282, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0307015597820282, "step": 190 }, { "calibration/aurc": 0.22339887578048012, "calibration/batch_distribution_entropy": 0.7031810388951124, "calibration/buffer_distribution_entropy": 0.7558727835947326, "calibration/confidence_entropy": 0.23025108654477772, "calibration/coverage@0%": 0.0171875, "calibration/coverage@1%": 0.0171875, "calibration/coverage@10%": 0.3052164872798434, "calibration/coverage@15%": 0.42831993028375737, "calibration/coverage@20%": 0.47638973825831704, "calibration/coverage@25%": 0.6150952482876713, "calibration/coverage@30%": 0.6717610995596869, "calibration/coverage@5%": 0.17548235689823874, "calibration/ece": 0.13616244771925584, "calibration/mean_confidence": 0.5098264604818323, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 504.2, "completions/max_terminated_length": 504.2, "completions/mean_length": 196.30712890625, "completions/mean_terminated_length": 196.3457824707031, "completions/min_length": 57.4, "completions/min_terminated_length": 92.6, "epoch": 0.624, "grad_norm": 0.0016954562161117792, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 660314485.0, "reward": 1.278175687789917, "reward_std": 0.1583867758512497, "rewards/accuracy_reward": 0.507421875, "rewards/batch_coverage_0": 0.5575575470924378, "rewards/batch_coverage_1": 0.5575575470924378, "rewards/batch_coverage_10": 0.596357774734497, "rewards/batch_coverage_15": 0.6038527488708496, "rewards/batch_coverage_20": 0.6089828252792359, "rewards/batch_coverage_25": 0.6120626330375671, "rewards/batch_coverage_5": 0.5829194903373718, "rewards/brier_reward": 0.8186290860176086, "rewards/confidence_uniqueness_reward": 0.8801854848861694, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0023375329561531543, "rewards/frontier_ece_reward": 0.015244904533028602, "rewards/frontier_entropy_batch_reward": -0.5869454622268677, "signal/accuracy_reward/centered_abs_mean": 0.084375, "signal/accuracy_reward/group_std_mean": 0.1125134527683258, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0421875, "signal/advantage_abs_mean": 0.1217761904001236, "signal/advantage_pre_scale_abs_mean": 0.1217761904001236, "signal/advantage_pre_scale_std": 0.19559427797794343, "signal/advantage_std": 0.19559427797794343, "signal/batch_coverage_0/centered_abs_mean": 0.15701129138469697, "signal/batch_coverage_0/group_std_mean": 0.20394995510578157, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.015701129287481307, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.015701129287481307, "signal/batch_coverage_1/centered_abs_mean": 0.15701129138469697, "signal/batch_coverage_1/group_std_mean": 0.20394995510578157, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.015701129287481307, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.015701129287481307, "signal/batch_coverage_10/centered_abs_mean": 0.16522761583328247, "signal/batch_coverage_10/group_std_mean": 0.21434899270534516, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.016522761806845664, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.016522761806845664, "signal/batch_coverage_15/centered_abs_mean": 0.16934556365013123, "signal/batch_coverage_15/group_std_mean": 0.21964602470397948, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.016934556514024736, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.016934556514024736, "signal/batch_coverage_20/centered_abs_mean": 0.17093099951744078, "signal/batch_coverage_20/group_std_mean": 0.2225628077983856, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.017093100026249884, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.017093100026249884, "signal/batch_coverage_25/centered_abs_mean": 0.17006333768367768, "signal/batch_coverage_25/group_std_mean": 0.22242553234100343, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01700633317232132, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01700633317232132, "signal/batch_coverage_5/centered_abs_mean": 0.1607960045337677, "signal/batch_coverage_5/group_std_mean": 0.2090065598487854, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.016079600527882577, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.016079600527882577, "signal/brier_reward/centered_abs_mean": 0.1368030786514282, "signal/brier_reward/group_std_mean": 0.1751306027173996, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013680307939648629, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013680307939648629, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05568226352334023, "signal/confidence_uniqueness_reward/group_std_mean": 0.07030727565288544, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005568226426839828, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005568226426839828, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015239943517372013, "signal/frontier_aurc_reward/group_std_mean": 0.0022282961290329695, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9049930051551202e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9049930051551202e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.015793051198124887, "signal/frontier_ece_reward/group_std_mean": 0.022042370960116388, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015793051803484558, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015793051803484558, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3185978889465332, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4003509938716888, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.059375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031859788671135904, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031859788671135904, "step": 195 }, { "calibration/aurc": 0.25818907579779654, "calibration/batch_distribution_entropy": 0.6443005343910162, "calibration/buffer_distribution_entropy": 0.7526658672700117, "calibration/confidence_entropy": 0.21598953797233658, "calibration/coverage@0%": 0.023091976516634048, "calibration/coverage@1%": 0.023091976516634048, "calibration/coverage@10%": 0.14755381604696674, "calibration/coverage@15%": 0.3197651663405088, "calibration/coverage@20%": 0.4682699363992172, "calibration/coverage@25%": 0.5401923312133072, "calibration/coverage@30%": 0.6648116438356164, "calibration/coverage@5%": 0.1365949119373777, "calibration/ece": 0.159088284084341, "calibration/mean_confidence": 0.6250040115878907, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 575.8, "completions/max_terminated_length": 575.8, "completions/mean_length": 193.503125, "completions/mean_terminated_length": 193.63548889160157, "completions/min_length": 18.2, "completions/min_terminated_length": 91.8, "epoch": 0.64, "grad_norm": 0.0019849766977131367, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 677638645.0, "reward": 1.3034381151199341, "reward_std": 0.14825326800346375, "rewards/accuracy_reward": 0.5529296875, "rewards/batch_coverage_0": 0.5572910845279694, "rewards/batch_coverage_1": 0.5572910845279694, "rewards/batch_coverage_10": 0.6068256497383118, "rewards/batch_coverage_15": 0.6149711847305298, "rewards/batch_coverage_20": 0.6288576126098633, "rewards/batch_coverage_25": 0.6334639191627502, "rewards/batch_coverage_5": 0.5896883249282837, "rewards/brier_reward": 0.8173499464988708, "rewards/confidence_uniqueness_reward": 0.8553975939750671, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002552823629230261, "rewards/frontier_ece_reward": 0.016957807727158068, "rewards/frontier_entropy_batch_reward": -0.604624617099762, "signal/accuracy_reward/centered_abs_mean": 0.07900390625, "signal/accuracy_reward/group_std_mean": 0.10646929293870926, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039501953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039501953125, "signal/advantage_abs_mean": 0.11008306294679641, "signal/advantage_pre_scale_abs_mean": 0.11008306294679641, "signal/advantage_pre_scale_std": 0.18596419095993041, "signal/advantage_std": 0.18596419095993041, "signal/batch_coverage_0/centered_abs_mean": 0.12932612597942353, "signal/batch_coverage_0/group_std_mean": 0.17381446361541747, "signal/batch_coverage_0/group_zero_std_frac": 0.00625, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012932613119482994, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012932613119482994, "signal/batch_coverage_1/centered_abs_mean": 0.12932612597942353, "signal/batch_coverage_1/group_std_mean": 0.17381446361541747, "signal/batch_coverage_1/group_zero_std_frac": 0.00625, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012932613119482994, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012932613119482994, "signal/batch_coverage_10/centered_abs_mean": 0.1382578879594803, "signal/batch_coverage_10/group_std_mean": 0.18876586258411407, "signal/batch_coverage_10/group_zero_std_frac": 0.00625, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013825790025293827, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013825790025293827, "signal/batch_coverage_15/centered_abs_mean": 0.1413852721452713, "signal/batch_coverage_15/group_std_mean": 0.19356752038002015, "signal/batch_coverage_15/group_zero_std_frac": 0.00625, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01413852721452713, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01413852721452713, "signal/batch_coverage_20/centered_abs_mean": 0.14702281653881072, "signal/batch_coverage_20/group_std_mean": 0.2009480744600296, "signal/batch_coverage_20/group_zero_std_frac": 0.00625, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01470228172838688, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01470228172838688, "signal/batch_coverage_25/centered_abs_mean": 0.14943260848522186, "signal/batch_coverage_25/group_std_mean": 0.20391323864459993, "signal/batch_coverage_25/group_zero_std_frac": 0.00625, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01494326014071703, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01494326014071703, "signal/batch_coverage_5/centered_abs_mean": 0.13566993772983552, "signal/batch_coverage_5/group_std_mean": 0.1848902851343155, "signal/batch_coverage_5/group_zero_std_frac": 0.00625, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013566994294524192, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013566994294524192, "signal/brier_reward/centered_abs_mean": 0.1233207032084465, "signal/brier_reward/group_std_mean": 0.16159166097640992, "signal/brier_reward/group_zero_std_frac": 0.00625, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012332070805132388, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012332070805132388, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07103251814842224, "signal/confidence_uniqueness_reward/group_std_mean": 0.09032833874225617, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007103251945227385, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007103251945227385, "signal/format_reward/centered_abs_mean": 0.001251220703125, "signal/format_reward/group_std_mean": 0.0027073150966316463, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002036643889732659, "signal/frontier_aurc_reward/group_std_mean": 0.002958608232438564, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.545804818510078e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.545804818510078e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014836767874658107, "signal/frontier_ece_reward/group_std_mean": 0.02108708433806896, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001483676815405488, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001483676815405488, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30819383859634397, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38484176993370056, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.109375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0308193851262331, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0308193851262331, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.45066771849639575, "eval_calibration/batch_distribution_entropy": 0.6308715104303002, "eval_calibration/buffer_distribution_entropy": 0.7501602692465891, "eval_calibration/confidence_entropy": 0.23284911409052508, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.125, "eval_calibration/coverage@25%": 0.28125, "eval_calibration/coverage@30%": 0.3125, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.2192780273711064, "eval_calibration/mean_confidence": 0.5297764255255403, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 365.5, "eval_completions/max_terminated_length": 365.5, "eval_completions/mean_length": 198.55636978149414, "eval_completions/mean_terminated_length": 198.55636978149414, "eval_completions/min_length": 105.5, "eval_completions/min_terminated_length": 105.5, "eval_loss": 0.0, "eval_num_tokens": 677638645.0, "eval_reward": 0.9594081491231918, "eval_reward_std": 0.2795492261648178, "eval_rewards/accuracy_reward": 0.400390625, "eval_rewards/batch_coverage_0": 0.2906555011868477, "eval_rewards/batch_coverage_1": 0.2906555011868477, "eval_rewards/batch_coverage_10": 0.2898253872990608, "eval_rewards/batch_coverage_15": 0.28895024210214615, "eval_rewards/batch_coverage_20": 0.269227497279644, "eval_rewards/batch_coverage_25": 0.25441256538033485, "eval_rewards/batch_coverage_5": 0.2906555011868477, "eval_rewards/brier_reward": 0.7779050469398499, "eval_rewards/confidence_uniqueness_reward": 0.82861328125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003932581515982747, "eval_rewards/frontier_ece_reward": 0.011719705304130912, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 20.7013, "eval_samples_per_second": 24.153, "eval_signal/accuracy_reward/centered_abs_mean": 0.4666748046875, "eval_signal/accuracy_reward/group_std_mean": 0.490452878177166, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23333740234375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23333740234375, "eval_signal/advantage_abs_mean": 0.2340683601796627, "eval_signal/advantage_pre_scale_abs_mean": 0.2340683601796627, "eval_signal/advantage_pre_scale_std": 0.2773290351033211, "eval_signal/advantage_std": 0.2773290351033211, "eval_signal/batch_coverage_0/centered_abs_mean": 0.42698580026626587, "eval_signal/batch_coverage_0/group_std_mean": 0.4923050254583359, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04269858077168465, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04269858077168465, "eval_signal/batch_coverage_1/centered_abs_mean": 0.42698580026626587, "eval_signal/batch_coverage_1/group_std_mean": 0.4923050254583359, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04269858077168465, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04269858077168465, "eval_signal/batch_coverage_10/centered_abs_mean": 0.42510540038347244, "eval_signal/batch_coverage_10/group_std_mean": 0.49010204523801804, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.042510541155934334, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.042510541155934334, "eval_signal/batch_coverage_15/centered_abs_mean": 0.4231926202774048, "eval_signal/batch_coverage_15/group_std_mean": 0.4875435531139374, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04231926240026951, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04231926240026951, "eval_signal/batch_coverage_20/centered_abs_mean": 0.3872489780187607, "eval_signal/batch_coverage_20/group_std_mean": 0.4467836171388626, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03872489836066961, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03872489836066961, "eval_signal/batch_coverage_25/centered_abs_mean": 0.36741621047258377, "eval_signal/batch_coverage_25/group_std_mean": 0.4262534826993942, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.036741622257977724, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.036741622257977724, "eval_signal/batch_coverage_5/centered_abs_mean": 0.42698580026626587, "eval_signal/batch_coverage_5/group_std_mean": 0.4923050254583359, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04269858077168465, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04269858077168465, "eval_signal/brier_reward/centered_abs_mean": 0.2969773858785629, "eval_signal/brier_reward/group_std_mean": 0.3588665947318077, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029697738122195005, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029697738122195005, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0925445556640625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10798590630292892, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00925445614848286, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00925445614848286, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004777178633958101, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008755038492381573, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9714732742577326e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9714732742577326e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.026464423164725304, "eval_signal/frontier_ece_reward/group_std_mean": 0.03283689636737108, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026464423281140625, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026464423281140625, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.193, "step": 200 }, { "calibration/aurc": 0.436145941498868, "calibration/batch_distribution_entropy": 0.7614577097539426, "calibration/buffer_distribution_entropy": 0.7494956520521233, "calibration/confidence_entropy": 0.2691995055392518, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.105859375, "calibration/coverage@25%": 0.16328125, "calibration/coverage@30%": 0.283203125, "calibration/coverage@5%": 0.0, "calibration/ece": 0.20043935589572012, "calibration/mean_confidence": 0.5348283324646669, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.6, "completions/max_terminated_length": 510.6, "completions/mean_length": 195.7046875, "completions/mean_terminated_length": 195.7046875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.656, "grad_norm": 0.0017912992043420672, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 694499205.0, "reward": 1.2205629348754883, "reward_std": 0.15728234946727754, "rewards/accuracy_reward": 0.46318359375, "rewards/batch_coverage_0": 0.5111707746982574, "rewards/batch_coverage_1": 0.5111707746982574, "rewards/batch_coverage_10": 0.5461826920509338, "rewards/batch_coverage_15": 0.5565583229064941, "rewards/batch_coverage_20": 0.5595410823822021, "rewards/batch_coverage_25": 0.5592182993888855, "rewards/batch_coverage_5": 0.5302507996559143, "rewards/brier_reward": 0.7698781847953796, "rewards/confidence_uniqueness_reward": 0.8930137634277344, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003443529363721609, "rewards/frontier_ece_reward": 0.011358432192355394, "rewards/frontier_entropy_batch_reward": -0.558201253414154, "signal/accuracy_reward/centered_abs_mean": 0.085833740234375, "signal/accuracy_reward/group_std_mean": 0.1159292846918106, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429168701171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0429168701171875, "signal/advantage_abs_mean": 0.11985027641057969, "signal/advantage_pre_scale_abs_mean": 0.11985027641057969, "signal/advantage_pre_scale_std": 0.19015521705150604, "signal/advantage_std": 0.19015521705150604, "signal/batch_coverage_0/centered_abs_mean": 0.13516520261764525, "signal/batch_coverage_0/group_std_mean": 0.18049061596393584, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013516520708799362, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013516520708799362, "signal/batch_coverage_1/centered_abs_mean": 0.13516520261764525, "signal/batch_coverage_1/group_std_mean": 0.18049061596393584, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013516520708799362, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013516520708799362, "signal/batch_coverage_10/centered_abs_mean": 0.1454785704612732, "signal/batch_coverage_10/group_std_mean": 0.19473540186882018, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014547857455909252, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014547857455909252, "signal/batch_coverage_15/centered_abs_mean": 0.15078432261943817, "signal/batch_coverage_15/group_std_mean": 0.20216354429721833, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.015078432485461235, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.015078432485461235, "signal/batch_coverage_20/centered_abs_mean": 0.15291909277439117, "signal/batch_coverage_20/group_std_mean": 0.20492708683013916, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.015291909500956536, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.015291909500956536, "signal/batch_coverage_25/centered_abs_mean": 0.15271320343017578, "signal/batch_coverage_25/group_std_mean": 0.20409930348396302, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015271320939064026, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.015271320939064026, "signal/batch_coverage_5/centered_abs_mean": 0.1402449667453766, "signal/batch_coverage_5/group_std_mean": 0.18734357357025147, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0140244971960783, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0140244971960783, "signal/brier_reward/centered_abs_mean": 0.1363556757569313, "signal/brier_reward/group_std_mean": 0.17585920095443724, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01363556794822216, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01363556794822216, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04828674793243408, "signal/confidence_uniqueness_reward/group_std_mean": 0.060622844845056534, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004828674811869859, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004828674811869859, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002189541654661298, "signal/frontier_aurc_reward/group_std_mean": 0.0030809998977929355, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00625, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.736927053774707e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.736927053774707e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.016461933963000774, "signal/frontier_ece_reward/group_std_mean": 0.023596494644880294, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016461933497339486, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016461933497339486, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3272219479084015, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4061711668968201, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032722195237874986, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032722195237874986, "step": 205 }, { "calibration/aurc": 0.27816703918639335, "calibration/batch_distribution_entropy": 0.6903424523444357, "calibration/buffer_distribution_entropy": 0.7495368119155803, "calibration/confidence_entropy": 0.2347079734115539, "calibration/coverage@0%": 0.05390625, "calibration/coverage@1%": 0.076953125, "calibration/coverage@10%": 0.1948452818627451, "calibration/coverage@15%": 0.29333180147058824, "calibration/coverage@20%": 0.36292126225490196, "calibration/coverage@25%": 0.411812193627451, "calibration/coverage@30%": 0.47671109068627454, "calibration/coverage@5%": 0.14984987745098038, "calibration/ece": 0.1697572193501926, "calibration/mean_confidence": 0.5357356714814479, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 553.0, "completions/max_terminated_length": 553.0, "completions/mean_length": 197.2208984375, "completions/mean_terminated_length": 197.27909240722656, "completions/min_length": 55.6, "completions/min_terminated_length": 91.0, "epoch": 0.672, "grad_norm": 0.001681040390394628, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 711432187.0, "reward": 1.264276671409607, "reward_std": 0.14050707817077637, "rewards/accuracy_reward": 0.496875, "rewards/batch_coverage_0": 0.5329373478889465, "rewards/batch_coverage_1": 0.5329373478889465, "rewards/batch_coverage_10": 0.5927770495414734, "rewards/batch_coverage_15": 0.6036754965782165, "rewards/batch_coverage_20": 0.6099525451660156, "rewards/batch_coverage_25": 0.6132030963897706, "rewards/batch_coverage_5": 0.5753331422805786, "rewards/brier_reward": 0.8148468971252442, "rewards/confidence_uniqueness_reward": 0.872640061378479, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002430691896006465, "rewards/frontier_ece_reward": 0.014318699017167091, "rewards/frontier_entropy_batch_reward": -0.6024617195129395, "signal/accuracy_reward/centered_abs_mean": 0.08050537109375, "signal/accuracy_reward/group_std_mean": 0.11048125326633454, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040252685546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.040252685546875, "signal/advantage_abs_mean": 0.10363083332777023, "signal/advantage_pre_scale_abs_mean": 0.10363083332777023, "signal/advantage_pre_scale_std": 0.17971519827842714, "signal/advantage_std": 0.17971519827842714, "signal/batch_coverage_0/centered_abs_mean": 0.12535898834466935, "signal/batch_coverage_0/group_std_mean": 0.16480962038040162, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012535898946225643, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012535898946225643, "signal/batch_coverage_1/centered_abs_mean": 0.12535898834466935, "signal/batch_coverage_1/group_std_mean": 0.16480962038040162, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012535898946225643, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012535898946225643, "signal/batch_coverage_10/centered_abs_mean": 0.13579677045345306, "signal/batch_coverage_10/group_std_mean": 0.1822267711162567, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01357967797666788, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01357967797666788, "signal/batch_coverage_15/centered_abs_mean": 0.13899961709976197, "signal/batch_coverage_15/group_std_mean": 0.1863336056470871, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013899962231516839, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013899962231516839, "signal/batch_coverage_20/centered_abs_mean": 0.1399351730942726, "signal/batch_coverage_20/group_std_mean": 0.18823565542697906, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01399351805448532, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01399351805448532, "signal/batch_coverage_25/centered_abs_mean": 0.14212610721588134, "signal/batch_coverage_25/group_std_mean": 0.19051645696163177, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014212611131370068, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.014212611131370068, "signal/batch_coverage_5/centered_abs_mean": 0.13201508820056915, "signal/batch_coverage_5/group_std_mean": 0.17619396150112152, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013201508484780789, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013201508484780789, "signal/brier_reward/centered_abs_mean": 0.11381059437990189, "signal/brier_reward/group_std_mean": 0.14819636046886445, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011381059512495994, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011381059512495994, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.054254206269979476, "signal/confidence_uniqueness_reward/group_std_mean": 0.06855006217956543, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005425420589745045, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005425420589745045, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013720685848966241, "signal/frontier_aurc_reward/group_std_mean": 0.0019520026398822665, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7150857092929073e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7150857092929073e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.013757929392158984, "signal/frontier_ece_reward/group_std_mean": 0.019465847685933114, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013757929671555757, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013757929671555757, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3010860621929169, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37441075444221494, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.096875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030108605325222016, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030108605325222016, "step": 210 }, { "calibration/aurc": 0.3334160486367128, "calibration/batch_distribution_entropy": 0.7490269037549024, "calibration/buffer_distribution_entropy": 0.7462465371181635, "calibration/confidence_entropy": 0.269153724018258, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.139453125, "calibration/coverage@15%": 0.2046875, "calibration/coverage@20%": 0.35078125, "calibration/coverage@25%": 0.413671875, "calibration/coverage@30%": 0.467578125, "calibration/coverage@5%": 0.01328125, "calibration/ece": 0.1800994213842444, "calibration/mean_confidence": 0.4994494024658354, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 597.0, "completions/max_terminated_length": 597.0, "completions/mean_length": 203.0525390625, "completions/mean_terminated_length": 203.0525390625, "completions/min_length": 102.4, "completions/min_terminated_length": 102.4, "epoch": 0.688, "grad_norm": 0.0015836784150451422, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 728465365.0, "reward": 1.2559720993041992, "reward_std": 0.1384851634502411, "rewards/accuracy_reward": 0.50068359375, "rewards/batch_coverage_0": 0.531680804491043, "rewards/batch_coverage_1": 0.531680804491043, "rewards/batch_coverage_10": 0.573654568195343, "rewards/batch_coverage_15": 0.5778225183486938, "rewards/batch_coverage_20": 0.5828616857528687, "rewards/batch_coverage_25": 0.586078429222107, "rewards/batch_coverage_5": 0.558163869380951, "rewards/brier_reward": 0.8070386648178101, "rewards/confidence_uniqueness_reward": 0.88953857421875, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0022389247780665754, "rewards/frontier_ece_reward": 0.014052477292716503, "rewards/frontier_entropy_batch_reward": -0.5959893703460694, "signal/accuracy_reward/centered_abs_mean": 0.090765380859375, "signal/accuracy_reward/group_std_mean": 0.11465331614017486, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0453826904296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0453826904296875, "signal/advantage_abs_mean": 0.10710875988006592, "signal/advantage_pre_scale_abs_mean": 0.10710875988006592, "signal/advantage_pre_scale_std": 0.1759258270263672, "signal/advantage_std": 0.1759258270263672, "signal/batch_coverage_0/centered_abs_mean": 0.12940916717052459, "signal/batch_coverage_0/group_std_mean": 0.1708761364221573, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012940916605293751, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012940916605293751, "signal/batch_coverage_1/centered_abs_mean": 0.12940916717052459, "signal/batch_coverage_1/group_std_mean": 0.1708761364221573, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012940916605293751, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012940916605293751, "signal/batch_coverage_10/centered_abs_mean": 0.1361958459019661, "signal/batch_coverage_10/group_std_mean": 0.1809363543987274, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0136195857077837, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0136195857077837, "signal/batch_coverage_15/centered_abs_mean": 0.13794144690036775, "signal/batch_coverage_15/group_std_mean": 0.18336707949638367, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013794144801795482, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013794144801795482, "signal/batch_coverage_20/centered_abs_mean": 0.1340498149394989, "signal/batch_coverage_20/group_std_mean": 0.18017881512641906, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013404982350766658, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013404982350766658, "signal/batch_coverage_25/centered_abs_mean": 0.13671108484268188, "signal/batch_coverage_25/group_std_mean": 0.18310289680957795, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013671108894050122, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013671108894050122, "signal/batch_coverage_5/centered_abs_mean": 0.13302011638879777, "signal/batch_coverage_5/group_std_mean": 0.17659396231174468, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013302012160420418, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013302012160420418, "signal/brier_reward/centered_abs_mean": 0.12155969887971878, "signal/brier_reward/group_std_mean": 0.15423001050949098, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012155969999730587, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012155969999730587, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045909452438354495, "signal/confidence_uniqueness_reward/group_std_mean": 0.056747060269117355, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004590945364907384, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004590945364907384, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011943122604861855, "signal/frontier_aurc_reward/group_std_mean": 0.0016959201195277274, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4928904056432657e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4928904056432657e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01512746512889862, "signal/frontier_ece_reward/group_std_mean": 0.021570420265197753, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015127464896067977, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015127464896067977, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29471340775489807, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37298219203948973, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02947134114801884, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02947134114801884, "step": 215 }, { "calibration/aurc": 0.2355489229172564, "calibration/batch_distribution_entropy": 0.640870734793577, "calibration/buffer_distribution_entropy": 0.745662741922789, "calibration/confidence_entropy": 0.2047382660438588, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0390625, "calibration/coverage@10%": 0.18203125, "calibration/coverage@15%": 0.29609375, "calibration/coverage@20%": 0.441015625, "calibration/coverage@25%": 0.612109375, "calibration/coverage@30%": 0.72578125, "calibration/coverage@5%": 0.06875, "calibration/ece": 0.14953982855671993, "calibration/mean_confidence": 0.5587629108731182, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 576.4, "completions/max_terminated_length": 576.4, "completions/mean_length": 201.7212890625, "completions/mean_terminated_length": 201.74125061035156, "completions/min_length": 80.0, "completions/min_terminated_length": 99.6, "epoch": 0.704, "grad_norm": 0.0029429446440190077, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 745397135.0, "reward": 1.297256064414978, "reward_std": 0.14224921464920043, "rewards/accuracy_reward": 0.5150390625, "rewards/batch_coverage_0": 0.58274827003479, "rewards/batch_coverage_1": 0.58274827003479, "rewards/batch_coverage_10": 0.6211008548736572, "rewards/batch_coverage_15": 0.6245938181877136, "rewards/batch_coverage_20": 0.6274907231330872, "rewards/batch_coverage_25": 0.6309248447418213, "rewards/batch_coverage_5": 0.6062975645065307, "rewards/brier_reward": 0.8248634696006775, "rewards/confidence_uniqueness_reward": 0.8828810691833496, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0022668347228318453, "rewards/frontier_ece_reward": 0.015720732137560846, "rewards/frontier_entropy_batch_reward": -0.6007444858551025, "signal/accuracy_reward/centered_abs_mean": 0.07073974609375, "signal/accuracy_reward/group_std_mean": 0.09534884691238403, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035369873046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035369873046875, "signal/advantage_abs_mean": 0.10416051000356674, "signal/advantage_pre_scale_abs_mean": 0.10416051000356674, "signal/advantage_pre_scale_std": 0.17625333666801452, "signal/advantage_std": 0.17625333666801452, "signal/batch_coverage_0/centered_abs_mean": 0.13438412249088288, "signal/batch_coverage_0/group_std_mean": 0.18114750385284423, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013438411988317966, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013438411988317966, "signal/batch_coverage_1/centered_abs_mean": 0.13438412249088288, "signal/batch_coverage_1/group_std_mean": 0.18114750385284423, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013438411988317966, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013438411988317966, "signal/batch_coverage_10/centered_abs_mean": 0.1422608584165573, "signal/batch_coverage_10/group_std_mean": 0.1935015231370926, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014226085878908635, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014226085878908635, "signal/batch_coverage_15/centered_abs_mean": 0.14202981293201447, "signal/batch_coverage_15/group_std_mean": 0.19373472929000854, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014202981814742088, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014202981814742088, "signal/batch_coverage_20/centered_abs_mean": 0.14324666261672975, "signal/batch_coverage_20/group_std_mean": 0.19504115581512452, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014324666187167167, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014324666187167167, "signal/batch_coverage_25/centered_abs_mean": 0.14737085103988648, "signal/batch_coverage_25/group_std_mean": 0.19997539818286897, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014737085625529289, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.014737085625529289, "signal/batch_coverage_5/centered_abs_mean": 0.1389584869146347, "signal/batch_coverage_5/group_std_mean": 0.18836452662944794, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013895849138498307, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013895849138498307, "signal/brier_reward/centered_abs_mean": 0.11510290652513504, "signal/brier_reward/group_std_mean": 0.15334579348564148, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011510290764272213, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011510290764272213, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05111872330307961, "signal/confidence_uniqueness_reward/group_std_mean": 0.06499804481863976, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005111872497946024, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005111872497946024, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013043643673881888, "signal/frontier_aurc_reward/group_std_mean": 0.0019197963876649738, "signal/frontier_aurc_reward/group_zero_std_frac": 0.009375, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6304554083035328e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6304554083035328e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014480138197541237, "signal/frontier_ece_reward/group_std_mean": 0.021022016927599908, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014480138663202525, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014480138663202525, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3132603347301483, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38880074620246885, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031326035782694814, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031326035782694814, "step": 220 }, { "calibration/aurc": 0.24117854419539175, "calibration/batch_distribution_entropy": 0.6917096147909891, "calibration/buffer_distribution_entropy": 0.7415210871493679, "calibration/confidence_entropy": 0.21799001668587992, "calibration/coverage@0%": 0.038671875, "calibration/coverage@1%": 0.054296875, "calibration/coverage@10%": 0.2015625, "calibration/coverage@15%": 0.2765625, "calibration/coverage@20%": 0.51484375, "calibration/coverage@25%": 0.621875, "calibration/coverage@30%": 0.670703125, "calibration/coverage@5%": 0.142578125, "calibration/ece": 0.1317607727717102, "calibration/mean_confidence": 0.5068727895744845, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.4, "completions/max_terminated_length": 512.4, "completions/mean_length": 202.0845703125, "completions/mean_terminated_length": 202.0845703125, "completions/min_length": 95.4, "completions/min_terminated_length": 95.4, "epoch": 0.72, "grad_norm": 0.0018140418687835336, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 762476337.0, "reward": 1.2914434671401978, "reward_std": 0.13651327788829803, "rewards/accuracy_reward": 0.533984375, "rewards/batch_coverage_0": 0.5698285698890686, "rewards/batch_coverage_1": 0.5698285698890686, "rewards/batch_coverage_10": 0.6016792893409729, "rewards/batch_coverage_15": 0.6043854236602784, "rewards/batch_coverage_20": 0.608850610256195, "rewards/batch_coverage_25": 0.6127121925354004, "rewards/batch_coverage_5": 0.5894207715988159, "rewards/brier_reward": 0.8136415481567383, "rewards/confidence_uniqueness_reward": 0.8610710144042969, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00200494471937418, "rewards/frontier_ece_reward": 0.013372968323528767, "rewards/frontier_entropy_batch_reward": -0.600027346611023, "signal/accuracy_reward/centered_abs_mean": 0.08544921875, "signal/accuracy_reward/group_std_mean": 0.10862117111682892, "signal/accuracy_reward/group_zero_std_frac": 0.70625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042724609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042724609375, "signal/advantage_abs_mean": 0.10617989897727967, "signal/advantage_pre_scale_abs_mean": 0.10617989897727967, "signal/advantage_pre_scale_std": 0.17531263828277588, "signal/advantage_std": 0.17531263828277588, "signal/batch_coverage_0/centered_abs_mean": 0.13560049831867219, "signal/batch_coverage_0/group_std_mean": 0.1758313685655594, "signal/batch_coverage_0/group_zero_std_frac": 0.015625, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013560050167143345, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013560050167143345, "signal/batch_coverage_1/centered_abs_mean": 0.13560049831867219, "signal/batch_coverage_1/group_std_mean": 0.1758313685655594, "signal/batch_coverage_1/group_zero_std_frac": 0.015625, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013560050167143345, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013560050167143345, "signal/batch_coverage_10/centered_abs_mean": 0.13977060616016387, "signal/batch_coverage_10/group_std_mean": 0.1825422078371048, "signal/batch_coverage_10/group_zero_std_frac": 0.015625, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013977061398327351, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013977061398327351, "signal/batch_coverage_15/centered_abs_mean": 0.14058802425861358, "signal/batch_coverage_15/group_std_mean": 0.18333434462547302, "signal/batch_coverage_15/group_zero_std_frac": 0.015625, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014058802835643291, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014058802835643291, "signal/batch_coverage_20/centered_abs_mean": 0.13900991082191466, "signal/batch_coverage_20/group_std_mean": 0.1814419984817505, "signal/batch_coverage_20/group_zero_std_frac": 0.015625, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013900990970432759, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013900990970432759, "signal/batch_coverage_25/centered_abs_mean": 0.1429667666554451, "signal/batch_coverage_25/group_std_mean": 0.186186283826828, "signal/batch_coverage_25/group_zero_std_frac": 0.015625, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01429667677730322, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01429667677730322, "signal/batch_coverage_5/centered_abs_mean": 0.13655753284692765, "signal/batch_coverage_5/group_std_mean": 0.17737014293670655, "signal/batch_coverage_5/group_zero_std_frac": 0.015625, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013655753619968892, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013655753619968892, "signal/brier_reward/centered_abs_mean": 0.12080991268157959, "signal/brier_reward/group_std_mean": 0.15258357524871827, "signal/brier_reward/group_zero_std_frac": 0.015625, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012080991454422475, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012080991454422475, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05788230895996094, "signal/confidence_uniqueness_reward/group_std_mean": 0.07352784276008606, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.015625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005788230989128351, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005788230989128351, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.001230324897915125, "signal/frontier_aurc_reward/group_std_mean": 0.0017709915526211261, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5379060823761392e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5379060823761392e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01474948674440384, "signal/frontier_ece_reward/group_std_mean": 0.020724740251898764, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001474948669783771, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001474948669783771, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29659512639045715, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36827229857444765, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029659513384103775, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029659513384103775, "step": 225 }, { "calibration/aurc": 0.2523398918321875, "calibration/batch_distribution_entropy": 0.7061118258311535, "calibration/buffer_distribution_entropy": 0.7388043538380987, "calibration/confidence_entropy": 0.22098310735812107, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.050390625, "calibration/coverage@10%": 0.0875, "calibration/coverage@15%": 0.3453125, "calibration/coverage@20%": 0.467578125, "calibration/coverage@25%": 0.596875, "calibration/coverage@30%": 0.673828125, "calibration/coverage@5%": 0.073046875, "calibration/ece": 0.17879724224531168, "calibration/mean_confidence": 0.6009326217256324, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 518.2, "completions/max_terminated_length": 518.2, "completions/mean_length": 197.7541015625, "completions/mean_terminated_length": 197.8127685546875, "completions/min_length": 54.8, "completions/min_terminated_length": 93.4, "epoch": 0.736, "grad_norm": 0.0016909514088183641, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 779440923.0, "reward": 1.2925546407699584, "reward_std": 0.1415196657180786, "rewards/accuracy_reward": 0.54072265625, "rewards/batch_coverage_0": 0.5608146786689758, "rewards/batch_coverage_1": 0.5608146786689758, "rewards/batch_coverage_10": 0.594396460056305, "rewards/batch_coverage_15": 0.5996437549591065, "rewards/batch_coverage_20": 0.6081606268882751, "rewards/batch_coverage_25": 0.6098075866699219, "rewards/batch_coverage_5": 0.5855140924453736, "rewards/brier_reward": 0.8093111157417298, "rewards/confidence_uniqueness_reward": 0.8509040236473083, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0025167773477733136, "rewards/frontier_ece_reward": 0.013303298316895962, "rewards/frontier_entropy_batch_reward": -0.5689576864242554, "signal/accuracy_reward/centered_abs_mean": 0.080865478515625, "signal/accuracy_reward/group_std_mean": 0.10603559166193008, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404327392578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0404327392578125, "signal/advantage_abs_mean": 0.1061436727643013, "signal/advantage_pre_scale_abs_mean": 0.1061436727643013, "signal/advantage_pre_scale_std": 0.1785441070795059, "signal/advantage_std": 0.1785441070795059, "signal/batch_coverage_0/centered_abs_mean": 0.12894578129053116, "signal/batch_coverage_0/group_std_mean": 0.16901729106903077, "signal/batch_coverage_0/group_zero_std_frac": 0.00625, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012894578650593758, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012894578650593758, "signal/batch_coverage_1/centered_abs_mean": 0.12894578129053116, "signal/batch_coverage_1/group_std_mean": 0.16901729106903077, "signal/batch_coverage_1/group_zero_std_frac": 0.00625, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012894578650593758, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012894578650593758, "signal/batch_coverage_10/centered_abs_mean": 0.13518355786800385, "signal/batch_coverage_10/group_std_mean": 0.1799877792596817, "signal/batch_coverage_10/group_zero_std_frac": 0.00625, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013518355973064899, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013518355973064899, "signal/batch_coverage_15/centered_abs_mean": 0.13510996997356414, "signal/batch_coverage_15/group_std_mean": 0.18045617043972015, "signal/batch_coverage_15/group_zero_std_frac": 0.00625, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013510997965931892, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013510997965931892, "signal/batch_coverage_20/centered_abs_mean": 0.1411931872367859, "signal/batch_coverage_20/group_std_mean": 0.18840061128139496, "signal/batch_coverage_20/group_zero_std_frac": 0.00625, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014119319431483746, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014119319431483746, "signal/batch_coverage_25/centered_abs_mean": 0.14360681772232056, "signal/batch_coverage_25/group_std_mean": 0.19102306962013244, "signal/batch_coverage_25/group_zero_std_frac": 0.00625, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014360682666301727, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.014360682666301727, "signal/batch_coverage_5/centered_abs_mean": 0.13322059959173202, "signal/batch_coverage_5/group_std_mean": 0.177263942360878, "signal/batch_coverage_5/group_zero_std_frac": 0.00625, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01332206018269062, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01332206018269062, "signal/brier_reward/centered_abs_mean": 0.11520067602396011, "signal/brier_reward/group_std_mean": 0.1510821118950844, "signal/brier_reward/group_zero_std_frac": 0.00625, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011520067788660526, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011520067788660526, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07168898284435272, "signal/confidence_uniqueness_reward/group_std_mean": 0.08953625559806824, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007168898358941078, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007168898358941078, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001666014944203198, "signal/frontier_aurc_reward/group_std_mean": 0.002366511942818761, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.082518840325065e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.082518840325065e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014191032014787197, "signal/frontier_ece_reward/group_std_mean": 0.020698029920458792, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001419103262014687, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001419103262014687, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30279971957206725, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38480743765830994, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.059375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03027997352182865, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03027997352182865, "step": 230 }, { "calibration/aurc": 0.24314173580957563, "calibration/batch_distribution_entropy": 0.7123511896880926, "calibration/buffer_distribution_entropy": 0.7371898135623439, "calibration/confidence_entropy": 0.22319584805799755, "calibration/coverage@0%": 0.009784735812133072, "calibration/coverage@1%": 0.009784735812133072, "calibration/coverage@10%": 0.25522718933463795, "calibration/coverage@15%": 0.35569349315068494, "calibration/coverage@20%": 0.4861928204500979, "calibration/coverage@25%": 0.5659001956947163, "calibration/coverage@30%": 0.6616400134540117, "calibration/coverage@5%": 0.1485475782778865, "calibration/ece": 0.13654095046647682, "calibration/mean_confidence": 0.5268796281130378, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 445.2, "completions/max_terminated_length": 445.2, "completions/mean_length": 192.54716796875, "completions/mean_terminated_length": 192.56629943847656, "completions/min_length": 74.8, "completions/min_terminated_length": 93.0, "epoch": 0.752, "grad_norm": 0.0022267785388976336, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 796639806.0, "reward": 1.291816520690918, "reward_std": 0.13338485807180406, "rewards/accuracy_reward": 0.5451171875, "rewards/batch_coverage_0": 0.5534084558486938, "rewards/batch_coverage_1": 0.5534084558486938, "rewards/batch_coverage_10": 0.59267737865448, "rewards/batch_coverage_15": 0.5985195159912109, "rewards/batch_coverage_20": 0.6050418257713318, "rewards/batch_coverage_25": 0.6092750668525696, "rewards/batch_coverage_5": 0.5784332752227783, "rewards/brier_reward": 0.7937209248542786, "rewards/confidence_uniqueness_reward": 0.8617787718772888, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002670770836994052, "rewards/frontier_ece_reward": 0.012176255136728287, "rewards/frontier_entropy_batch_reward": -0.5650394916534424, "signal/accuracy_reward/centered_abs_mean": 0.07152099609375, "signal/accuracy_reward/group_std_mean": 0.0952995702624321, "signal/accuracy_reward/group_zero_std_frac": 0.725, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035760498046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035760498046875, "signal/advantage_abs_mean": 0.10051141977310181, "signal/advantage_pre_scale_abs_mean": 0.10051141977310181, "signal/advantage_pre_scale_std": 0.17218650579452516, "signal/advantage_std": 0.17218650579452516, "signal/batch_coverage_0/centered_abs_mean": 0.12577387243509291, "signal/batch_coverage_0/group_std_mean": 0.1656516134738922, "signal/batch_coverage_0/group_zero_std_frac": 0.015625, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0125773873180151, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0125773873180151, "signal/batch_coverage_1/centered_abs_mean": 0.12577387243509291, "signal/batch_coverage_1/group_std_mean": 0.1656516134738922, "signal/batch_coverage_1/group_zero_std_frac": 0.015625, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0125773873180151, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0125773873180151, "signal/batch_coverage_10/centered_abs_mean": 0.13408605754375458, "signal/batch_coverage_10/group_std_mean": 0.17692401707172395, "signal/batch_coverage_10/group_zero_std_frac": 0.015625, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013408605940639973, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013408605940639973, "signal/batch_coverage_15/centered_abs_mean": 0.13487772792577743, "signal/batch_coverage_15/group_std_mean": 0.17835747599601745, "signal/batch_coverage_15/group_zero_std_frac": 0.015625, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013487772829830647, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013487772829830647, "signal/batch_coverage_20/centered_abs_mean": 0.13585045635700227, "signal/batch_coverage_20/group_std_mean": 0.18036530017852784, "signal/batch_coverage_20/group_zero_std_frac": 0.015625, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01358504593372345, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01358504593372345, "signal/batch_coverage_25/centered_abs_mean": 0.13990364819765091, "signal/batch_coverage_25/group_std_mean": 0.18538238406181334, "signal/batch_coverage_25/group_zero_std_frac": 0.015625, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013990364596247673, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013990364596247673, "signal/batch_coverage_5/centered_abs_mean": 0.1291911095380783, "signal/batch_coverage_5/group_std_mean": 0.16966727375984192, "signal/batch_coverage_5/group_zero_std_frac": 0.015625, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012919111363589763, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012919111363589763, "signal/brier_reward/centered_abs_mean": 0.10877085328102112, "signal/brier_reward/group_std_mean": 0.14029166996479034, "signal/brier_reward/group_zero_std_frac": 0.015625, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01087708566337824, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01087708566337824, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0628635786473751, "signal/confidence_uniqueness_reward/group_std_mean": 0.07769058793783187, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.015625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006286358088254928, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006286358088254928, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015472988365218044, "signal/frontier_aurc_reward/group_std_mean": 0.00224983487278223, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00625, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.934123574756086e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.934123574756086e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01419222578406334, "signal/frontier_ece_reward/group_std_mean": 0.020413671061396597, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014192226575687529, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014192226575687529, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3012643814086914, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3788640916347504, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030126439034938814, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030126439034938814, "step": 235 }, { "calibration/aurc": 0.21815909386420346, "calibration/batch_distribution_entropy": 0.769684210167051, "calibration/buffer_distribution_entropy": 0.7371303368412581, "calibration/confidence_entropy": 0.2846331447906999, "calibration/coverage@0%": 0.053515625, "calibration/coverage@1%": 0.182421875, "calibration/coverage@10%": 0.357421875, "calibration/coverage@15%": 0.451953125, "calibration/coverage@20%": 0.51015625, "calibration/coverage@25%": 0.562109375, "calibration/coverage@30%": 0.643359375, "calibration/coverage@5%": 0.248828125, "calibration/ece": 0.15731911620859837, "calibration/mean_confidence": 0.5699674765575071, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.8, "completions/max_terminated_length": 524.8, "completions/mean_length": 203.03603515625, "completions/mean_terminated_length": 203.03603515625, "completions/min_length": 92.2, "completions/min_terminated_length": 92.2, "epoch": 0.768, "grad_norm": 0.001630605780519545, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 813651599.0, "reward": 1.2780816316604615, "reward_std": 0.14629258513450621, "rewards/accuracy_reward": 0.5091796875, "rewards/batch_coverage_0": 0.5428201794624329, "rewards/batch_coverage_1": 0.5428201794624329, "rewards/batch_coverage_10": 0.5980752348899842, "rewards/batch_coverage_15": 0.6076067090034485, "rewards/batch_coverage_20": 0.6146794557571411, "rewards/batch_coverage_25": 0.6174161672592163, "rewards/batch_coverage_5": 0.5823442339897156, "rewards/brier_reward": 0.8140376687049866, "rewards/confidence_uniqueness_reward": 0.8872955322265625, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0023141817888244986, "rewards/frontier_ece_reward": 0.014350495114922524, "rewards/frontier_entropy_batch_reward": -0.5862390518188476, "signal/accuracy_reward/centered_abs_mean": 0.07115478515625, "signal/accuracy_reward/group_std_mean": 0.09603603929281235, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035577392578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035577392578125, "signal/advantage_abs_mean": 0.10806111246347427, "signal/advantage_pre_scale_abs_mean": 0.10806111246347427, "signal/advantage_pre_scale_std": 0.17815430760383605, "signal/advantage_std": 0.17815430760383605, "signal/batch_coverage_0/centered_abs_mean": 0.12889273911714555, "signal/batch_coverage_0/group_std_mean": 0.17496535778045655, "signal/batch_coverage_0/group_zero_std_frac": 0.00625, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012889273837208748, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012889273837208748, "signal/batch_coverage_1/centered_abs_mean": 0.12889273911714555, "signal/batch_coverage_1/group_std_mean": 0.17496535778045655, "signal/batch_coverage_1/group_zero_std_frac": 0.00625, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012889273837208748, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012889273837208748, "signal/batch_coverage_10/centered_abs_mean": 0.13530712127685546, "signal/batch_coverage_10/group_std_mean": 0.18599896728992463, "signal/batch_coverage_10/group_zero_std_frac": 0.00625, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013530712574720383, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013530712574720383, "signal/batch_coverage_15/centered_abs_mean": 0.13818536698818207, "signal/batch_coverage_15/group_std_mean": 0.19022200405597686, "signal/batch_coverage_15/group_zero_std_frac": 0.00625, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013818537257611752, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013818537257611752, "signal/batch_coverage_20/centered_abs_mean": 0.14182150661945342, "signal/batch_coverage_20/group_std_mean": 0.1955121785402298, "signal/batch_coverage_20/group_zero_std_frac": 0.00625, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01418215073645115, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01418215073645115, "signal/batch_coverage_25/centered_abs_mean": 0.14496648013591767, "signal/batch_coverage_25/group_std_mean": 0.1991838574409485, "signal/batch_coverage_25/group_zero_std_frac": 0.00625, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01449664793908596, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01449664793908596, "signal/batch_coverage_5/centered_abs_mean": 0.13354593515396118, "signal/batch_coverage_5/group_std_mean": 0.18284645676612854, "signal/batch_coverage_5/group_zero_std_frac": 0.00625, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013354593142867088, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013354593142867088, "signal/brier_reward/centered_abs_mean": 0.11099631637334824, "signal/brier_reward/group_std_mean": 0.1493169844150543, "signal/brier_reward/group_zero_std_frac": 0.00625, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011099631898105145, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011099631898105145, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05087149143218994, "signal/confidence_uniqueness_reward/group_std_mean": 0.06436762884259224, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005087149236351252, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005087149236351252, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014162956038489937, "signal/frontier_aurc_reward/group_std_mean": 0.0021210991777479648, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00625, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.770369544829009e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.770369544829009e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.015161270461976527, "signal/frontier_ece_reward/group_std_mean": 0.021915557235479353, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00151612707413733, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00151612707413733, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30829928517341615, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3859909653663635, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.084375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030829928815364838, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030829928815364838, "step": 240 }, { "calibration/aurc": 0.31010626739798186, "calibration/batch_distribution_entropy": 0.6831615592574666, "calibration/buffer_distribution_entropy": 0.7349332924643199, "calibration/confidence_entropy": 0.21844742802956504, "calibration/coverage@0%": 0.054296875, "calibration/coverage@1%": 0.094140625, "calibration/coverage@10%": 0.146484375, "calibration/coverage@15%": 0.255859375, "calibration/coverage@20%": 0.2851807118395303, "calibration/coverage@25%": 0.4258301736790607, "calibration/coverage@30%": 0.5031739236790607, "calibration/coverage@5%": 0.121875, "calibration/ece": 0.19194136252588118, "calibration/mean_confidence": 0.5863911610582415, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 681.2, "completions/max_terminated_length": 681.2, "completions/mean_length": 207.463671875, "completions/mean_terminated_length": 207.5250457763672, "completions/min_length": 59.6, "completions/min_terminated_length": 98.0, "epoch": 0.784, "grad_norm": 0.0015109021915122867, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 830950395.0, "reward": 1.268712568283081, "reward_std": 0.13139432221651076, "rewards/accuracy_reward": 0.52626953125, "rewards/batch_coverage_0": 0.5273642539978027, "rewards/batch_coverage_1": 0.5273642539978027, "rewards/batch_coverage_10": 0.5780391335487366, "rewards/batch_coverage_15": 0.5797824621200561, "rewards/batch_coverage_20": 0.5853094935417176, "rewards/batch_coverage_25": 0.5844082474708557, "rewards/batch_coverage_5": 0.5631725430488587, "rewards/brier_reward": 0.7936733603477478, "rewards/confidence_uniqueness_reward": 0.8792999029159546, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0025360194500535726, "rewards/frontier_ece_reward": 0.011786134168505668, "rewards/frontier_entropy_batch_reward": -0.5726401686668396, "signal/accuracy_reward/centered_abs_mean": 0.086236572265625, "signal/accuracy_reward/group_std_mean": 0.11323271244764328, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0431182861328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0431182861328125, "signal/advantage_abs_mean": 0.09704598337411881, "signal/advantage_pre_scale_abs_mean": 0.09704598337411881, "signal/advantage_pre_scale_std": 0.16713069379329681, "signal/advantage_std": 0.16713069379329681, "signal/batch_coverage_0/centered_abs_mean": 0.11174871325492859, "signal/batch_coverage_0/group_std_mean": 0.1489221602678299, "signal/batch_coverage_0/group_zero_std_frac": 0.0125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01117487158626318, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01117487158626318, "signal/batch_coverage_1/centered_abs_mean": 0.11174871325492859, "signal/batch_coverage_1/group_std_mean": 0.1489221602678299, "signal/batch_coverage_1/group_zero_std_frac": 0.0125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01117487158626318, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01117487158626318, "signal/batch_coverage_10/centered_abs_mean": 0.12213961333036423, "signal/batch_coverage_10/group_std_mean": 0.16586555540561676, "signal/batch_coverage_10/group_zero_std_frac": 0.0125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0122139610350132, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0122139610350132, "signal/batch_coverage_15/centered_abs_mean": 0.12145914733409882, "signal/batch_coverage_15/group_std_mean": 0.16523004770278932, "signal/batch_coverage_15/group_zero_std_frac": 0.0125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012145914323627948, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012145914323627948, "signal/batch_coverage_20/centered_abs_mean": 0.12562210708856583, "signal/batch_coverage_20/group_std_mean": 0.1706618309020996, "signal/batch_coverage_20/group_zero_std_frac": 0.0125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012562210485339165, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012562210485339165, "signal/batch_coverage_25/centered_abs_mean": 0.12414962351322174, "signal/batch_coverage_25/group_std_mean": 0.16929054260253906, "signal/batch_coverage_25/group_zero_std_frac": 0.0125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01241496242582798, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01241496242582798, "signal/batch_coverage_5/centered_abs_mean": 0.11780698150396347, "signal/batch_coverage_5/group_std_mean": 0.1591268002986908, "signal/batch_coverage_5/group_zero_std_frac": 0.0125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011780698224902153, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.011780698224902153, "signal/brier_reward/centered_abs_mean": 0.10932108014822006, "signal/brier_reward/group_std_mean": 0.1419556260108948, "signal/brier_reward/group_zero_std_frac": 0.0125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010932107828557492, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010932107828557492, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05147377476096153, "signal/confidence_uniqueness_reward/group_std_mean": 0.06528320237994194, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00514737768098712, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00514737768098712, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014083811081945896, "signal/frontier_aurc_reward/group_std_mean": 0.0020011267391964794, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00625, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.760476479830686e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.760476479830686e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014706828817725181, "signal/frontier_ece_reward/group_std_mean": 0.021075991913676262, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014706829097121954, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014706829097121954, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2917087584733963, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3725455284118652, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02917087487876415, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02917087487876415, "step": 245 }, { "calibration/aurc": 0.19282041934687397, "calibration/batch_distribution_entropy": 0.6920062865146607, "calibration/buffer_distribution_entropy": 0.7301821197316796, "calibration/confidence_entropy": 0.22344387400004026, "calibration/coverage@0%": 0.046966731898238745, "calibration/coverage@1%": 0.05675146771037182, "calibration/coverage@10%": 0.42365612769080235, "calibration/coverage@15%": 0.5261764615949118, "calibration/coverage@20%": 0.5903368089530333, "calibration/coverage@25%": 0.663110781555773, "calibration/coverage@30%": 0.7335280088062623, "calibration/coverage@5%": 0.27499235567514674, "calibration/ece": 0.12089007125129762, "calibration/mean_confidence": 0.5416411376509921, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 598.2, "completions/max_terminated_length": 598.2, "completions/mean_length": 206.33037109375, "completions/mean_terminated_length": 206.45191040039063, "completions/min_length": 42.2, "completions/min_terminated_length": 104.8, "epoch": 0.8, "grad_norm": 0.0014231241075322032, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 848073778.0, "reward": 1.3338397979736327, "reward_std": 0.13077863156795502, "rewards/accuracy_reward": 0.56591796875, "rewards/batch_coverage_0": 0.5898068666458129, "rewards/batch_coverage_1": 0.5898068666458129, "rewards/batch_coverage_10": 0.6379106283187866, "rewards/batch_coverage_15": 0.6420271754264831, "rewards/batch_coverage_20": 0.6438995718955993, "rewards/batch_coverage_25": 0.6488496541976929, "rewards/batch_coverage_5": 0.6152103543281555, "rewards/brier_reward": 0.8356095075607299, "rewards/confidence_uniqueness_reward": 0.8643570899963379, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0020930708618834616, "rewards/frontier_ece_reward": 0.016211163997650147, "rewards/frontier_entropy_batch_reward": -0.5712017893791199, "signal/accuracy_reward/centered_abs_mean": 0.072723388671875, "signal/accuracy_reward/group_std_mean": 0.09923464208841323, "signal/accuracy_reward/group_zero_std_frac": 0.70625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363616943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0363616943359375, "signal/advantage_abs_mean": 0.09668900221586227, "signal/advantage_pre_scale_abs_mean": 0.09668900221586227, "signal/advantage_pre_scale_std": 0.16912133395671844, "signal/advantage_std": 0.16912133395671844, "signal/batch_coverage_0/centered_abs_mean": 0.12063909322023392, "signal/batch_coverage_0/group_std_mean": 0.15900228917598724, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012063909694552422, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012063909694552422, "signal/batch_coverage_1/centered_abs_mean": 0.12063909322023392, "signal/batch_coverage_1/group_std_mean": 0.15900228917598724, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012063909694552422, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012063909694552422, "signal/batch_coverage_10/centered_abs_mean": 0.13073422014713287, "signal/batch_coverage_10/group_std_mean": 0.17543690502643586, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013073421642184257, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013073421642184257, "signal/batch_coverage_15/centered_abs_mean": 0.13373751044273377, "signal/batch_coverage_15/group_std_mean": 0.17892775535583497, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013373749889433384, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013373749889433384, "signal/batch_coverage_20/centered_abs_mean": 0.1333797663450241, "signal/batch_coverage_20/group_std_mean": 0.17859428822994233, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01333797611296177, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01333797611296177, "signal/batch_coverage_25/centered_abs_mean": 0.13365401029586793, "signal/batch_coverage_25/group_std_mean": 0.1794481545686722, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013365400955080986, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013365400955080986, "signal/batch_coverage_5/centered_abs_mean": 0.12666998505592347, "signal/batch_coverage_5/group_std_mean": 0.16839803159236907, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012666998617351054, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012666998617351054, "signal/brier_reward/centered_abs_mean": 0.10218747854232788, "signal/brier_reward/group_std_mean": 0.13509040623903273, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010218747891485691, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010218747891485691, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05471925586462021, "signal/confidence_uniqueness_reward/group_std_mean": 0.07163973078131676, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005471925716847181, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005471925716847181, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011783302179537714, "signal/frontier_aurc_reward/group_std_mean": 0.0017042707419022917, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4729128452017903e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4729128452017903e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014615490287542342, "signal/frontier_ece_reward/group_std_mean": 0.021591220796108247, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014615490566939116, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014615490566939116, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2841952681541443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35755252838134766, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.115625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028419527411460876, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028419527411460876, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4775511860244453, "eval_calibration/batch_distribution_entropy": 0.6805845499846008, "eval_calibration/buffer_distribution_entropy": 0.7287791314575365, "eval_calibration/confidence_entropy": 0.2213890362749855, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0859375, "eval_calibration/coverage@15%": 0.125, "eval_calibration/coverage@20%": 0.1328125, "eval_calibration/coverage@25%": 0.2578125, "eval_calibration/coverage@30%": 0.2890625, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.2288685063967824, "eval_calibration/mean_confidence": 0.5053683476164722, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 554.75, "eval_completions/max_terminated_length": 554.75, "eval_completions/mean_length": 219.2979507446289, "eval_completions/mean_terminated_length": 219.2979507446289, "eval_completions/min_length": 116.25, "eval_completions/min_terminated_length": 116.25, "eval_loss": 0.0, "eval_num_tokens": 848073778.0, "eval_reward": 0.974822074174881, "eval_reward_std": 0.27409257739782333, "eval_rewards/accuracy_reward": 0.416015625, "eval_rewards/batch_coverage_0": 0.30540529638528824, "eval_rewards/batch_coverage_1": 0.30540529638528824, "eval_rewards/batch_coverage_10": 0.2937457114458084, "eval_rewards/batch_coverage_15": 0.29220280796289444, "eval_rewards/batch_coverage_20": 0.2766593173146248, "eval_rewards/batch_coverage_25": 0.2715606912970543, "eval_rewards/batch_coverage_5": 0.30540529638528824, "eval_rewards/brier_reward": 0.7973639369010925, "eval_rewards/confidence_uniqueness_reward": 0.8115234375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0032213623635470867, "eval_rewards/frontier_ece_reward": 0.00927359308116138, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 24.8989, "eval_samples_per_second": 20.081, "eval_signal/accuracy_reward/centered_abs_mean": 0.4693603515625, "eval_signal/accuracy_reward/group_std_mean": 0.4918139800429344, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23468017578125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23468017578125, "eval_signal/advantage_abs_mean": 0.22791888937354088, "eval_signal/advantage_pre_scale_abs_mean": 0.22791888937354088, "eval_signal/advantage_pre_scale_std": 0.27271074801683426, "eval_signal/advantage_std": 0.27271074801683426, "eval_signal/batch_coverage_0/centered_abs_mean": 0.4482870399951935, "eval_signal/batch_coverage_0/group_std_mean": 0.5090989097952843, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04482870548963547, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04482870548963547, "eval_signal/batch_coverage_1/centered_abs_mean": 0.4482870399951935, "eval_signal/batch_coverage_1/group_std_mean": 0.5090989097952843, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04482870548963547, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04482870548963547, "eval_signal/batch_coverage_10/centered_abs_mean": 0.42958785593509674, "eval_signal/batch_coverage_10/group_std_mean": 0.4874923899769783, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04295878764241934, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04295878764241934, "eval_signal/batch_coverage_15/centered_abs_mean": 0.4125537723302841, "eval_signal/batch_coverage_15/group_std_mean": 0.46426092088222504, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04125537909567356, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04125537909567356, "eval_signal/batch_coverage_20/centered_abs_mean": 0.38762741535902023, "eval_signal/batch_coverage_20/group_std_mean": 0.4408438876271248, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03876274265348911, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03876274265348911, "eval_signal/batch_coverage_25/centered_abs_mean": 0.3840094059705734, "eval_signal/batch_coverage_25/group_std_mean": 0.4386216476559639, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03840094292536378, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03840094292536378, "eval_signal/batch_coverage_5/centered_abs_mean": 0.4482870399951935, "eval_signal/batch_coverage_5/group_std_mean": 0.5090989097952843, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04482870548963547, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04482870548963547, "eval_signal/brier_reward/centered_abs_mean": 0.27652557939291, "eval_signal/brier_reward/group_std_mean": 0.3379634767770767, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027652557473629713, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027652557473629713, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.11602783203125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13252386078238487, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011602783459238708, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011602783459238708, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003930443141143769, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007698433822952211, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.913054181088228e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.913054181088228e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.025017567910254, "eval_signal/frontier_ece_reward/group_std_mean": 0.03442148957401514, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025017568841576576, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025017568841576576, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.161, "step": 250 }, { "calibration/aurc": 0.2267960123496567, "calibration/batch_distribution_entropy": 0.6801923238285612, "calibration/buffer_distribution_entropy": 0.7272822166052157, "calibration/confidence_entropy": 0.20849673218961717, "calibration/coverage@0%": 0.0296875, "calibration/coverage@1%": 0.0296875, "calibration/coverage@10%": 0.2459026418786693, "calibration/coverage@15%": 0.34717312866927597, "calibration/coverage@20%": 0.5267497859589041, "calibration/coverage@25%": 0.6791913833170253, "calibration/coverage@30%": 0.7862921966731898, "calibration/coverage@5%": 0.069140625, "calibration/ece": 0.15203698595724888, "calibration/mean_confidence": 0.5770622252568032, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 576.2, "completions/max_terminated_length": 576.2, "completions/mean_length": 212.0181640625, "completions/mean_terminated_length": 212.0784149169922, "completions/min_length": 60.4, "completions/min_terminated_length": 102.2, "epoch": 0.816, "grad_norm": 0.001950260135345161, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 865344012.0, "reward": 1.2896551132202148, "reward_std": 0.1455328106880188, "rewards/accuracy_reward": 0.548828125, "rewards/batch_coverage_0": 0.5595304846763611, "rewards/batch_coverage_1": 0.5595304846763611, "rewards/batch_coverage_10": 0.5896369695663453, "rewards/batch_coverage_15": 0.596402394771576, "rewards/batch_coverage_20": 0.5992687940597534, "rewards/batch_coverage_25": 0.60002201795578, "rewards/batch_coverage_5": 0.5778818249702453, "rewards/brier_reward": 0.7952520966529846, "rewards/confidence_uniqueness_reward": 0.8402364134788514, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0025426708860322835, "rewards/frontier_ece_reward": 0.0110880795866251, "rewards/frontier_entropy_batch_reward": -0.5746568083763123, "signal/accuracy_reward/centered_abs_mean": 0.0798828125, "signal/accuracy_reward/group_std_mean": 0.10603945106267929, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03994140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03994140625, "signal/advantage_abs_mean": 0.11010724902153016, "signal/advantage_pre_scale_abs_mean": 0.11010724902153016, "signal/advantage_pre_scale_std": 0.18228633999824523, "signal/advantage_std": 0.18228633999824523, "signal/batch_coverage_0/centered_abs_mean": 0.13016874790191652, "signal/batch_coverage_0/group_std_mean": 0.17292714715003968, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01301687490195036, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01301687490195036, "signal/batch_coverage_1/centered_abs_mean": 0.13016874790191652, "signal/batch_coverage_1/group_std_mean": 0.17292714715003968, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01301687490195036, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01301687490195036, "signal/batch_coverage_10/centered_abs_mean": 0.1363198786973953, "signal/batch_coverage_10/group_std_mean": 0.1838128536939621, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013631988130509854, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013631988130509854, "signal/batch_coverage_15/centered_abs_mean": 0.13772199749946595, "signal/batch_coverage_15/group_std_mean": 0.18577627539634706, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013772199861705303, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013772199861705303, "signal/batch_coverage_20/centered_abs_mean": 0.13926664888858795, "signal/batch_coverage_20/group_std_mean": 0.18798602223396302, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013926665298640728, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013926665298640728, "signal/batch_coverage_25/centered_abs_mean": 0.13877122104167938, "signal/batch_coverage_25/group_std_mean": 0.18786073625087737, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013877122662961483, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013877122662961483, "signal/batch_coverage_5/centered_abs_mean": 0.13375141620635986, "signal/batch_coverage_5/group_std_mean": 0.17892039716243743, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.01337514165788889, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.01337514165788889, "signal/brier_reward/centered_abs_mean": 0.12692040652036668, "signal/brier_reward/group_std_mean": 0.16331054866313935, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012692040763795376, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012692040763795376, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07178411781787872, "signal/confidence_uniqueness_reward/group_std_mean": 0.08999869525432587, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00717841163277626, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00717841163277626, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015543691348284482, "signal/frontier_aurc_reward/group_std_mean": 0.002139846235513687, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.942961334862048e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.942961334862048e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.015287755616009235, "signal/frontier_ece_reward/group_std_mean": 0.022031076624989508, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015287755988538266, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015287755988538266, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29790628552436826, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3725601613521576, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02979062981903553, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02979062981903553, "step": 255 }, { "calibration/aurc": 0.3024189916118689, "calibration/batch_distribution_entropy": 0.6861718769182762, "calibration/buffer_distribution_entropy": 0.7247281231943082, "calibration/confidence_entropy": 0.2141393949059299, "calibration/coverage@0%": 0.042578125, "calibration/coverage@1%": 0.04609375, "calibration/coverage@10%": 0.201171875, "calibration/coverage@15%": 0.2602418664383562, "calibration/coverage@20%": 0.28878042441291585, "calibration/coverage@25%": 0.30363487646771037, "calibration/coverage@30%": 0.5067927470645792, "calibration/coverage@5%": 0.18671875, "calibration/ece": 0.14507543516520943, "calibration/mean_confidence": 0.5084626958457709, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 563.8, "completions/max_terminated_length": 563.8, "completions/mean_length": 214.33603515625, "completions/mean_terminated_length": 214.35703125, "completions/min_length": 81.0, "completions/min_terminated_length": 102.2, "epoch": 0.832, "grad_norm": 0.0017113488866016269, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 882547165.0, "reward": 1.3040168046951295, "reward_std": 0.12452108860015869, "rewards/accuracy_reward": 0.54287109375, "rewards/batch_coverage_0": 0.5823396205902099, "rewards/batch_coverage_1": 0.5823396205902099, "rewards/batch_coverage_10": 0.6146724224090576, "rewards/batch_coverage_15": 0.6178886413574218, "rewards/batch_coverage_20": 0.6242651224136353, "rewards/batch_coverage_25": 0.6258123993873597, "rewards/batch_coverage_5": 0.6028478622436524, "rewards/brier_reward": 0.8178551077842713, "rewards/confidence_uniqueness_reward": 0.8379308581352234, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002056396356783807, "rewards/frontier_ece_reward": 0.012563115172088146, "rewards/frontier_entropy_batch_reward": -0.5914687752723694, "signal/accuracy_reward/centered_abs_mean": 0.076885986328125, "signal/accuracy_reward/group_std_mean": 0.1008117601275444, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0384429931640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0384429931640625, "signal/advantage_abs_mean": 0.09464630335569382, "signal/advantage_pre_scale_abs_mean": 0.09464630335569382, "signal/advantage_pre_scale_std": 0.16419532597064973, "signal/advantage_std": 0.16419532597064973, "signal/batch_coverage_0/centered_abs_mean": 0.11629042327404022, "signal/batch_coverage_0/group_std_mean": 0.15208458602428437, "signal/batch_coverage_0/group_zero_std_frac": 0.01875, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011629042960703374, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011629042960703374, "signal/batch_coverage_1/centered_abs_mean": 0.11629042327404022, "signal/batch_coverage_1/group_std_mean": 0.15208458602428437, "signal/batch_coverage_1/group_zero_std_frac": 0.01875, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011629042960703374, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011629042960703374, "signal/batch_coverage_10/centered_abs_mean": 0.12401841282844543, "signal/batch_coverage_10/group_std_mean": 0.1630850315093994, "signal/batch_coverage_10/group_zero_std_frac": 0.01875, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012401841208338737, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012401841208338737, "signal/batch_coverage_15/centered_abs_mean": 0.12051795870065689, "signal/batch_coverage_15/group_std_mean": 0.15964794158935547, "signal/batch_coverage_15/group_zero_std_frac": 0.01875, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012051796354353429, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012051796354353429, "signal/batch_coverage_20/centered_abs_mean": 0.12344660609960556, "signal/batch_coverage_20/group_std_mean": 0.16371358335018157, "signal/batch_coverage_20/group_zero_std_frac": 0.01875, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012344660982489587, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012344660982489587, "signal/batch_coverage_25/centered_abs_mean": 0.12385808080434799, "signal/batch_coverage_25/group_std_mean": 0.16440981924533843, "signal/batch_coverage_25/group_zero_std_frac": 0.01875, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012385808303952217, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.012385808303952217, "signal/batch_coverage_5/centered_abs_mean": 0.12124198824167251, "signal/batch_coverage_5/group_std_mean": 0.15847708582878112, "signal/batch_coverage_5/group_zero_std_frac": 0.01875, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012124198861420155, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012124198861420155, "signal/brier_reward/centered_abs_mean": 0.10539275407791138, "signal/brier_reward/group_std_mean": 0.13495143502950668, "signal/brier_reward/group_zero_std_frac": 0.01875, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010539275407791138, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010539275407791138, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06620322167873383, "signal/confidence_uniqueness_reward/group_std_mean": 0.08328029215335846, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.01875, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0066203223541378975, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0066203223541378975, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012305549927987159, "signal/frontier_aurc_reward/group_std_mean": 0.00173785334918648, "signal/frontier_aurc_reward/group_zero_std_frac": 0.00625, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5381937919300982e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5381937919300982e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014193679951131345, "signal/frontier_ece_reward/group_std_mean": 0.02084806077182293, "signal/frontier_ece_reward/group_zero_std_frac": 0.034375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014193680603057147, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014193680603057147, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28034199476242067, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3522096395492554, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.14375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02803419977426529, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803419977426529, "step": 260 }, { "calibration/aurc": 0.29979283230660575, "calibration/batch_distribution_entropy": 0.6765818092872171, "calibration/buffer_distribution_entropy": 0.7230590012667675, "calibration/confidence_entropy": 0.22264761767104865, "calibration/coverage@0%": 0.05626605308219178, "calibration/coverage@1%": 0.06915667808219178, "calibration/coverage@10%": 0.2630664444716243, "calibration/coverage@15%": 0.4146755748532289, "calibration/coverage@20%": 0.4494603106653621, "calibration/coverage@25%": 0.4838528926125244, "calibration/coverage@30%": 0.5705968688845401, "calibration/coverage@5%": 0.13172089041095889, "calibration/ece": 0.17770718042535594, "calibration/mean_confidence": 0.6034304608609038, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 800.0, "completions/max_terminated_length": 800.0, "completions/mean_length": 212.58818359375, "completions/mean_terminated_length": 212.6305938720703, "completions/min_length": 60.6, "completions/min_terminated_length": 98.8, "epoch": 0.848, "grad_norm": 0.0015765472780913115, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 899738436.0, "reward": 1.2874624252319335, "reward_std": 0.13262848556041718, "rewards/accuracy_reward": 0.51318359375, "rewards/batch_coverage_0": 0.5744598150253296, "rewards/batch_coverage_1": 0.5744598150253296, "rewards/batch_coverage_10": 0.603915560245514, "rewards/batch_coverage_15": 0.6057839632034302, "rewards/batch_coverage_20": 0.6098087549209594, "rewards/batch_coverage_25": 0.6135491847991943, "rewards/batch_coverage_5": 0.595538854598999, "rewards/brier_reward": 0.806088674068451, "rewards/confidence_uniqueness_reward": 0.8638126015663147, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002301845629699528, "rewards/frontier_ece_reward": 0.01232540961354971, "rewards/frontier_entropy_batch_reward": -0.5492840528488159, "signal/accuracy_reward/centered_abs_mean": 0.061627197265625, "signal/accuracy_reward/group_std_mean": 0.08691354244947433, "signal/accuracy_reward/group_zero_std_frac": 0.728125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0308135986328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0308135986328125, "signal/advantage_abs_mean": 0.10021563172340393, "signal/advantage_pre_scale_abs_mean": 0.10021563172340393, "signal/advantage_pre_scale_std": 0.1665943294763565, "signal/advantage_std": 0.1665943294763565, "signal/batch_coverage_0/centered_abs_mean": 0.12552589774131775, "signal/batch_coverage_0/group_std_mean": 0.164810910820961, "signal/batch_coverage_0/group_zero_std_frac": 0.00625, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012552590481936932, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012552590481936932, "signal/batch_coverage_1/centered_abs_mean": 0.12552589774131775, "signal/batch_coverage_1/group_std_mean": 0.164810910820961, "signal/batch_coverage_1/group_zero_std_frac": 0.00625, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012552590481936932, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012552590481936932, "signal/batch_coverage_10/centered_abs_mean": 0.1308951810002327, "signal/batch_coverage_10/group_std_mean": 0.17249469459056854, "signal/batch_coverage_10/group_zero_std_frac": 0.00625, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013089518807828427, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013089518807828427, "signal/batch_coverage_15/centered_abs_mean": 0.13017829209566117, "signal/batch_coverage_15/group_std_mean": 0.1718754142522812, "signal/batch_coverage_15/group_zero_std_frac": 0.00625, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01301782913506031, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01301782913506031, "signal/batch_coverage_20/centered_abs_mean": 0.13185496330261232, "signal/batch_coverage_20/group_std_mean": 0.17450777292251587, "signal/batch_coverage_20/group_zero_std_frac": 0.00625, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013185496255755425, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013185496255755425, "signal/batch_coverage_25/centered_abs_mean": 0.13360297530889512, "signal/batch_coverage_25/group_std_mean": 0.17730580568313598, "signal/batch_coverage_25/group_zero_std_frac": 0.00625, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013360296934843063, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013360296934843063, "signal/batch_coverage_5/centered_abs_mean": 0.12875944674015044, "signal/batch_coverage_5/group_std_mean": 0.16948716938495637, "signal/batch_coverage_5/group_zero_std_frac": 0.00625, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012875944934785366, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012875944934785366, "signal/brier_reward/centered_abs_mean": 0.11305683553218841, "signal/brier_reward/group_std_mean": 0.14634765088558196, "signal/brier_reward/group_zero_std_frac": 0.00625, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01130568366497755, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01130568366497755, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05614908337593079, "signal/confidence_uniqueness_reward/group_std_mean": 0.07217531129717827, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.00625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005614908412098885, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005614908412098885, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013377515482716263, "signal/frontier_aurc_reward/group_std_mean": 0.001973598566837609, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6721895008231513e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6721895008231513e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01556033082306385, "signal/frontier_ece_reward/group_std_mean": 0.021966927498579026, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015560331754386424, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015560331754386424, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2900454640388489, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3650382697582245, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029004548117518424, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029004548117518424, "step": 265 }, { "calibration/aurc": 0.2759403497776601, "calibration/batch_distribution_entropy": 0.6759332309201006, "calibration/buffer_distribution_entropy": 0.7218298133224164, "calibration/confidence_entropy": 0.22823251290571217, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1109375, "calibration/coverage@15%": 0.2078125, "calibration/coverage@20%": 0.36640625, "calibration/coverage@25%": 0.43671875, "calibration/coverage@30%": 0.518359375, "calibration/coverage@5%": 0.09609375, "calibration/ece": 0.21517310261243403, "calibration/mean_confidence": 0.6667003336869002, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 676.0, "completions/max_terminated_length": 676.0, "completions/mean_length": 215.37490234375, "completions/mean_terminated_length": 215.62887268066407, "completions/min_length": 45.4, "completions/min_terminated_length": 109.8, "epoch": 0.864, "grad_norm": 0.0014087754534557462, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 916930691.0, "reward": 1.2985878467559815, "reward_std": 0.13927391171455383, "rewards/accuracy_reward": 0.5646484375, "rewards/batch_coverage_0": 0.5505944132804871, "rewards/batch_coverage_1": 0.5505944132804871, "rewards/batch_coverage_10": 0.5925943732261658, "rewards/batch_coverage_15": 0.5966384470462799, "rewards/batch_coverage_20": 0.600320303440094, "rewards/batch_coverage_25": 0.6009154736995697, "rewards/batch_coverage_5": 0.5749504089355468, "rewards/brier_reward": 0.8001022934913635, "rewards/confidence_uniqueness_reward": 0.8556058764457702, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.0022088738158345224, "rewards/frontier_ece_reward": 0.012191972695291042, "rewards/frontier_entropy_batch_reward": -0.5657361030578614, "signal/accuracy_reward/centered_abs_mean": 0.0737060546875, "signal/accuracy_reward/group_std_mean": 0.09959790781140328, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03685302734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03685302734375, "signal/advantage_abs_mean": 0.10322214066982269, "signal/advantage_pre_scale_abs_mean": 0.10322214066982269, "signal/advantage_pre_scale_std": 0.17601919770240784, "signal/advantage_std": 0.17601919770240784, "signal/batch_coverage_0/centered_abs_mean": 0.12593218684196472, "signal/batch_coverage_0/group_std_mean": 0.16748648285865783, "signal/batch_coverage_0/group_zero_std_frac": 0.01875, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012593218125402927, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012593218125402927, "signal/batch_coverage_1/centered_abs_mean": 0.12593218684196472, "signal/batch_coverage_1/group_std_mean": 0.16748648285865783, "signal/batch_coverage_1/group_zero_std_frac": 0.01875, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012593218125402927, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012593218125402927, "signal/batch_coverage_10/centered_abs_mean": 0.13293709754943847, "signal/batch_coverage_10/group_std_mean": 0.17932658195495604, "signal/batch_coverage_10/group_zero_std_frac": 0.01875, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01329371016472578, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01329371016472578, "signal/batch_coverage_15/centered_abs_mean": 0.13411223590373994, "signal/batch_coverage_15/group_std_mean": 0.1812635064125061, "signal/batch_coverage_15/group_zero_std_frac": 0.01875, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013411223329603672, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013411223329603672, "signal/batch_coverage_20/centered_abs_mean": 0.1337016761302948, "signal/batch_coverage_20/group_std_mean": 0.18126676678657533, "signal/batch_coverage_20/group_zero_std_frac": 0.01875, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013370167277753354, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013370167277753354, "signal/batch_coverage_25/centered_abs_mean": 0.13443489372730255, "signal/batch_coverage_25/group_std_mean": 0.18231047987937926, "signal/batch_coverage_25/group_zero_std_frac": 0.01875, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013443489000201226, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013443489000201226, "signal/batch_coverage_5/centered_abs_mean": 0.1292974293231964, "signal/batch_coverage_5/group_std_mean": 0.17306924760341644, "signal/batch_coverage_5/group_zero_std_frac": 0.01875, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012929742969572545, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012929742969572545, "signal/brier_reward/centered_abs_mean": 0.1134807363152504, "signal/brier_reward/group_std_mean": 0.14848156571388244, "signal/brier_reward/group_zero_std_frac": 0.01875, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011348073929548263, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011348073929548263, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05932857394218445, "signal/confidence_uniqueness_reward/group_std_mean": 0.07796282023191452, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.01875, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005932857375591993, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005932857375591993, "signal/format_reward/centered_abs_mean": 0.00208740234375, "signal/format_reward/group_std_mean": 0.004553806083276868, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001043701171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001043701171875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012687626876868308, "signal/frontier_aurc_reward/group_std_mean": 0.001827608421444893, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5859533777984326e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5859533777984326e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.016462472081184388, "signal/frontier_ece_reward/group_std_mean": 0.024404867365956307, "signal/frontier_ece_reward/group_zero_std_frac": 0.0375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016462472500279545, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016462472500279545, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.285188752412796, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3623782157897949, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028518874570727347, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028518874570727347, "step": 270 }, { "calibration/aurc": 0.3703553403819811, "calibration/batch_distribution_entropy": 0.7133684850336566, "calibration/buffer_distribution_entropy": 0.7202831683503106, "calibration/confidence_entropy": 0.22813746743108307, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.031702544031311154, "calibration/coverage@20%": 0.031702544031311154, "calibration/coverage@25%": 0.22447330601761256, "calibration/coverage@30%": 0.3096616621819961, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19746284505661899, "calibration/mean_confidence": 0.5590293023641005, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 641.2, "completions/max_terminated_length": 641.2, "completions/mean_length": 215.3197265625, "completions/mean_terminated_length": 215.38296813964843, "completions/min_length": 62.2, "completions/min_terminated_length": 105.6, "epoch": 0.88, "grad_norm": 0.0035550326574593782, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 934282637.0, "reward": 1.2669667959213258, "reward_std": 0.13528041839599608, "rewards/accuracy_reward": 0.49970703125, "rewards/batch_coverage_0": 0.5631029605865479, "rewards/batch_coverage_1": 0.5631029605865479, "rewards/batch_coverage_10": 0.5879178166389465, "rewards/batch_coverage_15": 0.5897319793701172, "rewards/batch_coverage_20": 0.5955337166786194, "rewards/batch_coverage_25": 0.6003549456596374, "rewards/batch_coverage_5": 0.5774053573608399, "rewards/brier_reward": 0.8021393895149231, "rewards/confidence_uniqueness_reward": 0.8372437953948975, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0027949722949415444, "rewards/frontier_ece_reward": 0.012080707773566245, "rewards/frontier_entropy_batch_reward": -0.5556662797927856, "signal/accuracy_reward/centered_abs_mean": 0.076898193359375, "signal/accuracy_reward/group_std_mean": 0.0979268804192543, "signal/accuracy_reward/group_zero_std_frac": 0.734375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0384490966796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0384490966796875, "signal/advantage_abs_mean": 0.10434950143098831, "signal/advantage_pre_scale_abs_mean": 0.10434950143098831, "signal/advantage_pre_scale_std": 0.171790811419487, "signal/advantage_std": 0.171790811419487, "signal/batch_coverage_0/centered_abs_mean": 0.12332225143909455, "signal/batch_coverage_0/group_std_mean": 0.1597755014896393, "signal/batch_coverage_0/group_zero_std_frac": 0.025, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01233222484588623, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.01233222484588623, "signal/batch_coverage_1/centered_abs_mean": 0.12332225143909455, "signal/batch_coverage_1/group_std_mean": 0.1597755014896393, "signal/batch_coverage_1/group_zero_std_frac": 0.025, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01233222484588623, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.01233222484588623, "signal/batch_coverage_10/centered_abs_mean": 0.12346419841051101, "signal/batch_coverage_10/group_std_mean": 0.16303113102912903, "signal/batch_coverage_10/group_zero_std_frac": 0.025, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01234642006456852, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01234642006456852, "signal/batch_coverage_15/centered_abs_mean": 0.12326491475105286, "signal/batch_coverage_15/group_std_mean": 0.16280312538146974, "signal/batch_coverage_15/group_zero_std_frac": 0.025, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012326491996645927, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012326491996645927, "signal/batch_coverage_20/centered_abs_mean": 0.12646077275276185, "signal/batch_coverage_20/group_std_mean": 0.1670425981283188, "signal/batch_coverage_20/group_zero_std_frac": 0.025, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012646077573299408, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012646077573299408, "signal/batch_coverage_25/centered_abs_mean": 0.13245510756969453, "signal/batch_coverage_25/group_std_mean": 0.17370954751968384, "signal/batch_coverage_25/group_zero_std_frac": 0.025, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013245511427521705, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013245511427521705, "signal/batch_coverage_5/centered_abs_mean": 0.12559498399496077, "signal/batch_coverage_5/group_std_mean": 0.16357622146606446, "signal/batch_coverage_5/group_zero_std_frac": 0.025, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0125594986602664, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0125594986602664, "signal/brier_reward/centered_abs_mean": 0.11624975800514221, "signal/brier_reward/group_std_mean": 0.14766744375228882, "signal/brier_reward/group_zero_std_frac": 0.025, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011624976061284542, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011624976061284542, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06369166523218155, "signal/confidence_uniqueness_reward/group_std_mean": 0.08169415593147278, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.025, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063691666349768635, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063691666349768635, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001598638528957963, "signal/frontier_aurc_reward/group_std_mean": 0.002216655877418816, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.998298175749369e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.998298175749369e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014671192318201066, "signal/frontier_ece_reward/group_std_mean": 0.019565947353839874, "signal/frontier_ece_reward/group_zero_std_frac": 0.05, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014671192737296223, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014671192737296223, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2858834505081177, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36205363273620605, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028588346764445306, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028588346764445306, "step": 275 }, { "calibration/aurc": 0.3487537494892914, "calibration/batch_distribution_entropy": 0.7252037298917273, "calibration/buffer_distribution_entropy": 0.7198900225087584, "calibration/confidence_entropy": 0.2345359152758087, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.05518590998043053, "calibration/coverage@15%": 0.16850461717221135, "calibration/coverage@20%": 0.24861561276908023, "calibration/coverage@25%": 0.38031127690802347, "calibration/coverage@30%": 0.47884968199608613, "calibration/coverage@5%": 0.0, "calibration/ece": 0.20244766605736125, "calibration/mean_confidence": 0.575858111159339, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 777.0, "completions/max_terminated_length": 777.0, "completions/mean_length": 219.4595703125, "completions/mean_terminated_length": 219.52434997558595, "completions/min_length": 40.4, "completions/min_terminated_length": 101.8, "epoch": 0.896, "grad_norm": 0.0016001098556444049, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 951640751.0, "reward": 1.2776224613189697, "reward_std": 0.12914247065782547, "rewards/accuracy_reward": 0.5078125, "rewards/batch_coverage_0": 0.5558313012123108, "rewards/batch_coverage_1": 0.5558313012123108, "rewards/batch_coverage_10": 0.6021793842315674, "rewards/batch_coverage_15": 0.6080036878585815, "rewards/batch_coverage_20": 0.6127823710441589, "rewards/batch_coverage_25": 0.6156375765800476, "rewards/batch_coverage_5": 0.5886821031570435, "rewards/brier_reward": 0.8050480842590332, "rewards/confidence_uniqueness_reward": 0.8560371041297913, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0025883075315505264, "rewards/frontier_ece_reward": 0.012114399252459407, "rewards/frontier_entropy_batch_reward": -0.5731969356536866, "signal/accuracy_reward/centered_abs_mean": 0.061328125, "signal/accuracy_reward/group_std_mean": 0.08608203381299973, "signal/accuracy_reward/group_zero_std_frac": 0.7375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0306640625, "signal/advantage_abs_mean": 0.0942740187048912, "signal/advantage_pre_scale_abs_mean": 0.0942740187048912, "signal/advantage_pre_scale_std": 0.16601733565330506, "signal/advantage_std": 0.16601733565330506, "signal/batch_coverage_0/centered_abs_mean": 0.11676376014947891, "signal/batch_coverage_0/group_std_mean": 0.15656563341617585, "signal/batch_coverage_0/group_zero_std_frac": 0.021875, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011676376312971115, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011676376312971115, "signal/batch_coverage_1/centered_abs_mean": 0.11676376014947891, "signal/batch_coverage_1/group_std_mean": 0.15656563341617585, "signal/batch_coverage_1/group_zero_std_frac": 0.021875, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011676376312971115, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011676376312971115, "signal/batch_coverage_10/centered_abs_mean": 0.12755317836999894, "signal/batch_coverage_10/group_std_mean": 0.17383061945438386, "signal/batch_coverage_10/group_zero_std_frac": 0.021875, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012755317986011505, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012755317986011505, "signal/batch_coverage_15/centered_abs_mean": 0.12786179631948472, "signal/batch_coverage_15/group_std_mean": 0.17495982646942138, "signal/batch_coverage_15/group_zero_std_frac": 0.021875, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012786179967224597, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012786179967224597, "signal/batch_coverage_20/centered_abs_mean": 0.1294369652867317, "signal/batch_coverage_20/group_std_mean": 0.1769184023141861, "signal/batch_coverage_20/group_zero_std_frac": 0.021875, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012943696603178978, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012943696603178978, "signal/batch_coverage_25/centered_abs_mean": 0.1298828110098839, "signal/batch_coverage_25/group_std_mean": 0.17788674533367158, "signal/batch_coverage_25/group_zero_std_frac": 0.021875, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01298828087747097, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01298828087747097, "signal/batch_coverage_5/centered_abs_mean": 0.12295575588941574, "signal/batch_coverage_5/group_std_mean": 0.16745466887950897, "signal/batch_coverage_5/group_zero_std_frac": 0.021875, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012295575626194478, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012295575626194478, "signal/brier_reward/centered_abs_mean": 0.10006321221590042, "signal/brier_reward/group_std_mean": 0.134315325319767, "signal/brier_reward/group_zero_std_frac": 0.021875, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010006321221590042, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010006321221590042, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05508278608322144, "signal/confidence_uniqueness_reward/group_std_mean": 0.0718459963798523, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.021875, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00550827868282795, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00550827868282795, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.001336067053489387, "signal/frontier_aurc_reward/group_std_mean": 0.0019159423885867, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.670083911449183e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.670083911449183e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01357954703271389, "signal/frontier_ece_reward/group_std_mean": 0.018551066890358923, "signal/frontier_ece_reward/group_zero_std_frac": 0.034375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013579546939581633, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013579546939581633, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28496257662773133, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3574348032474518, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.13125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028496256843209267, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028496256843209267, "step": 280 }, { "calibration/aurc": 0.3279010208833354, "calibration/batch_distribution_entropy": 0.7316795531083246, "calibration/buffer_distribution_entropy": 0.7213093588529274, "calibration/confidence_entropy": 0.2522811933581405, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.12012867647058822, "calibration/coverage@15%": 0.18855085784313724, "calibration/coverage@20%": 0.3635968137254902, "calibration/coverage@25%": 0.4284742647058824, "calibration/coverage@30%": 0.4957077205882353, "calibration/coverage@5%": 0.05514093137254902, "calibration/ece": 0.18393924576238366, "calibration/mean_confidence": 0.5282364177652383, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 901.4, "completions/max_terminated_length": 901.4, "completions/mean_length": 219.38662109375, "completions/mean_terminated_length": 219.49356689453126, "completions/min_length": 39.8, "completions/min_terminated_length": 103.0, "epoch": 0.912, "grad_norm": 0.002637675730511546, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 968938566.0, "reward": 1.3016446590423585, "reward_std": 0.13964517116546632, "rewards/accuracy_reward": 0.5302734375, "rewards/batch_coverage_0": 0.5741848707199096, "rewards/batch_coverage_1": 0.5741848707199096, "rewards/batch_coverage_10": 0.617852532863617, "rewards/batch_coverage_15": 0.6220511078834534, "rewards/batch_coverage_20": 0.6254517316818238, "rewards/batch_coverage_25": 0.6275116086006165, "rewards/batch_coverage_5": 0.6016942858695984, "rewards/brier_reward": 0.8188377737998962, "rewards/confidence_uniqueness_reward": 0.8719737768173218, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.001976804086007178, "rewards/frontier_ece_reward": 0.012473126128315925, "rewards/frontier_entropy_batch_reward": -0.5779598355293274, "signal/accuracy_reward/centered_abs_mean": 0.064306640625, "signal/accuracy_reward/group_std_mean": 0.0910419762134552, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0321533203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0321533203125, "signal/advantage_abs_mean": 0.10506602376699448, "signal/advantage_pre_scale_abs_mean": 0.10506602376699448, "signal/advantage_pre_scale_std": 0.17485912442207335, "signal/advantage_std": 0.17485912442207335, "signal/batch_coverage_0/centered_abs_mean": 0.131300450861454, "signal/batch_coverage_0/group_std_mean": 0.1741587519645691, "signal/batch_coverage_0/group_zero_std_frac": 0.0125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013130045123398304, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013130045123398304, "signal/batch_coverage_1/centered_abs_mean": 0.131300450861454, "signal/batch_coverage_1/group_std_mean": 0.1741587519645691, "signal/batch_coverage_1/group_zero_std_frac": 0.0125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013130045123398304, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013130045123398304, "signal/batch_coverage_10/centered_abs_mean": 0.14013472199440002, "signal/batch_coverage_10/group_std_mean": 0.18686334192752838, "signal/batch_coverage_10/group_zero_std_frac": 0.0125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.014013472571969032, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.014013472571969032, "signal/batch_coverage_15/centered_abs_mean": 0.14249201416969298, "signal/batch_coverage_15/group_std_mean": 0.19025346934795379, "signal/batch_coverage_15/group_zero_std_frac": 0.0125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.014249200746417046, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.014249200746417046, "signal/batch_coverage_20/centered_abs_mean": 0.141669425368309, "signal/batch_coverage_20/group_std_mean": 0.19006492495536803, "signal/batch_coverage_20/group_zero_std_frac": 0.0125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.014166942983865737, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.014166942983865737, "signal/batch_coverage_25/centered_abs_mean": 0.14289966523647307, "signal/batch_coverage_25/group_std_mean": 0.19121894836425782, "signal/batch_coverage_25/group_zero_std_frac": 0.0125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01428996678441763, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01428996678441763, "signal/batch_coverage_5/centered_abs_mean": 0.13788548856973648, "signal/batch_coverage_5/group_std_mean": 0.18307800889015197, "signal/batch_coverage_5/group_zero_std_frac": 0.0125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013788548670709133, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013788548670709133, "signal/brier_reward/centered_abs_mean": 0.11448466181755065, "signal/brier_reward/group_std_mean": 0.1494537800550461, "signal/brier_reward/group_zero_std_frac": 0.0125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011448466219007969, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011448466219007969, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04987005516886711, "signal/confidence_uniqueness_reward/group_std_mean": 0.065058371424675, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049870054237544535, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049870054237544535, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_std_mean": 0.0029782545287162067, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0010798663133755326, "signal/frontier_aurc_reward/group_std_mean": 0.0016447607893496751, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3498328917194158e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3498328917194158e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014520647190511226, "signal/frontier_ece_reward/group_std_mean": 0.01967911943793297, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014520647935569286, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014520647935569286, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2984996199607849, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36841660737991333, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.121875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029849962890148164, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029849962890148164, "step": 285 }, { "calibration/aurc": 0.34628107455417145, "calibration/batch_distribution_entropy": 0.7617031711910593, "calibration/buffer_distribution_entropy": 0.7273704508223597, "calibration/confidence_entropy": 0.2782211951350248, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.09453125, "calibration/coverage@15%": 0.124609375, "calibration/coverage@20%": 0.220703125, "calibration/coverage@25%": 0.296875, "calibration/coverage@30%": 0.407421875, "calibration/coverage@5%": 0.0484375, "calibration/ece": 0.1588166385402925, "calibration/mean_confidence": 0.5132860872825648, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 522.6, "completions/max_terminated_length": 522.6, "completions/mean_length": 214.753515625, "completions/mean_terminated_length": 214.81558532714843, "completions/min_length": 61.0, "completions/min_terminated_length": 102.0, "epoch": 0.928, "grad_norm": 0.0018001672578975558, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 986164458.0, "reward": 1.2805254459381104, "reward_std": 0.13228480517864227, "rewards/accuracy_reward": 0.52255859375, "rewards/batch_coverage_0": 0.5598244071006775, "rewards/batch_coverage_1": 0.5598244071006775, "rewards/batch_coverage_10": 0.5985792994499206, "rewards/batch_coverage_15": 0.5995970368385315, "rewards/batch_coverage_20": 0.6019830465316772, "rewards/batch_coverage_25": 0.6030240416526794, "rewards/batch_coverage_5": 0.5841894626617432, "rewards/brier_reward": 0.798598051071167, "rewards/confidence_uniqueness_reward": 0.878295361995697, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0020402401685714723, "rewards/frontier_ece_reward": 0.010918319411575794, "rewards/frontier_entropy_batch_reward": -0.6006525158882141, "signal/accuracy_reward/centered_abs_mean": 0.064691162109375, "signal/accuracy_reward/group_std_mean": 0.08760243952274323, "signal/accuracy_reward/group_zero_std_frac": 0.7375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0323455810546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0323455810546875, "signal/advantage_abs_mean": 0.09802084267139435, "signal/advantage_pre_scale_abs_mean": 0.09802084267139435, "signal/advantage_pre_scale_std": 0.16889201402664183, "signal/advantage_std": 0.16889201402664183, "signal/batch_coverage_0/centered_abs_mean": 0.12150347679853439, "signal/batch_coverage_0/group_std_mean": 0.16013022959232331, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012150347977876664, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012150347977876664, "signal/batch_coverage_1/centered_abs_mean": 0.12150347679853439, "signal/batch_coverage_1/group_std_mean": 0.16013022959232331, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012150347977876664, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012150347977876664, "signal/batch_coverage_10/centered_abs_mean": 0.12959835082292556, "signal/batch_coverage_10/group_std_mean": 0.17412001490592957, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.01295983549207449, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.01295983549207449, "signal/batch_coverage_15/centered_abs_mean": 0.12758624851703643, "signal/batch_coverage_15/group_std_mean": 0.171616131067276, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012758625112473965, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012758625112473965, "signal/batch_coverage_20/centered_abs_mean": 0.12791385054588317, "signal/batch_coverage_20/group_std_mean": 0.17244038581848145, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012791384942829609, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012791384942829609, "signal/batch_coverage_25/centered_abs_mean": 0.12513090670108795, "signal/batch_coverage_25/group_std_mean": 0.16936978101730346, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01251309122890234, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.01251309122890234, "signal/batch_coverage_5/centered_abs_mean": 0.1272565320134163, "signal/batch_coverage_5/group_std_mean": 0.1697870075702667, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012725653126835823, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012725653126835823, "signal/brier_reward/centered_abs_mean": 0.10803148299455642, "signal/brier_reward/group_std_mean": 0.14384068846702575, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010803148709237575, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010803148709237575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04943007156252861, "signal/confidence_uniqueness_reward/group_std_mean": 0.06312261894345284, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004943007230758667, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004943007230758667, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0010793962515890597, "signal/frontier_aurc_reward/group_std_mean": 0.0016425697831436992, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3492453581420704e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3492453581420704e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014414687268435955, "signal/frontier_ece_reward/group_std_mean": 0.019503265619277954, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014414687175303698, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014414687175303698, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2935112476348877, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3679080069065094, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029351125285029412, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029351125285029412, "step": 290 }, { "calibration/aurc": 0.24575393115665692, "calibration/batch_distribution_entropy": 0.738798308780878, "calibration/buffer_distribution_entropy": 0.7321804334521286, "calibration/confidence_entropy": 0.26702329875816927, "calibration/coverage@0%": 0.067578125, "calibration/coverage@1%": 0.074609375, "calibration/coverage@10%": 0.281640625, "calibration/coverage@15%": 0.35234375, "calibration/coverage@20%": 0.448828125, "calibration/coverage@25%": 0.537109375, "calibration/coverage@30%": 0.61875, "calibration/coverage@5%": 0.2203125, "calibration/ece": 0.14126106139507846, "calibration/mean_confidence": 0.4586294707677744, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.8, "completions/max_terminated_length": 518.8, "completions/mean_length": 220.90615234375, "completions/mean_terminated_length": 220.90615234375, "completions/min_length": 107.6, "completions/min_terminated_length": 107.6, "epoch": 0.944, "grad_norm": 0.0014223521575331688, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 1003401961.0, "reward": 1.2642857074737548, "reward_std": 0.1236835777759552, "rewards/accuracy_reward": 0.5099609375, "rewards/batch_coverage_0": 0.5456533968448639, "rewards/batch_coverage_1": 0.5456533968448639, "rewards/batch_coverage_10": 0.5796523928642273, "rewards/batch_coverage_15": 0.5821020603179932, "rewards/batch_coverage_20": 0.5851274251937866, "rewards/batch_coverage_25": 0.5893722891807556, "rewards/batch_coverage_5": 0.5687803268432617, "rewards/brier_reward": 0.8048572063446044, "rewards/confidence_uniqueness_reward": 0.8740989685058593, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0016524946317076684, "rewards/frontier_ece_reward": 0.01148709300905466, "rewards/frontier_entropy_batch_reward": -0.5935260772705078, "signal/accuracy_reward/centered_abs_mean": 0.07744140625, "signal/accuracy_reward/group_std_mean": 0.10488596558570862, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.038720703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.038720703125, "signal/advantage_abs_mean": 0.09334558099508286, "signal/advantage_pre_scale_abs_mean": 0.09334558099508286, "signal/advantage_pre_scale_std": 0.15849049091339112, "signal/advantage_std": 0.15849049091339112, "signal/batch_coverage_0/centered_abs_mean": 0.11165303438901901, "signal/batch_coverage_0/group_std_mean": 0.14861661791801453, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011165303364396095, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.011165303364396095, "signal/batch_coverage_1/centered_abs_mean": 0.11165303438901901, "signal/batch_coverage_1/group_std_mean": 0.14861661791801453, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011165303364396095, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.011165303364396095, "signal/batch_coverage_10/centered_abs_mean": 0.121368607878685, "signal/batch_coverage_10/group_std_mean": 0.16234832406044006, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012136861123144627, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012136861123144627, "signal/batch_coverage_15/centered_abs_mean": 0.12064623832702637, "signal/batch_coverage_15/group_std_mean": 0.16168377697467803, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012064623646438122, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012064623646438122, "signal/batch_coverage_20/centered_abs_mean": 0.11667853146791458, "signal/batch_coverage_20/group_std_mean": 0.1575889140367508, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011667853407561778, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.011667853407561778, "signal/batch_coverage_25/centered_abs_mean": 0.12033923119306564, "signal/batch_coverage_25/group_std_mean": 0.16211841106414795, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012033923342823983, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.012033923342823983, "signal/batch_coverage_5/centered_abs_mean": 0.11773080676794052, "signal/batch_coverage_5/group_std_mean": 0.15715883374214173, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011773080937564373, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.011773080937564373, "signal/brier_reward/centered_abs_mean": 0.10707180798053742, "signal/brier_reward/group_std_mean": 0.13771842420101166, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010707181133329868, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010707181133329868, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.052157020568847655, "signal/confidence_uniqueness_reward/group_std_mean": 0.06535054147243499, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005215702205896377, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005215702205896377, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0006664380664005876, "signal/frontier_aurc_reward/group_std_mean": 0.000960760226007551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.330475884577026e-06, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.330475884577026e-06, "signal/frontier_ece_reward/centered_abs_mean": 0.014187652990221978, "signal/frontier_ece_reward/group_std_mean": 0.018812255188822746, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014187653549015522, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014187653549015522, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2870850205421448, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36086881160736084, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028708503022789954, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028708503022789954, "step": 295 }, { "calibration/aurc": 0.29138843660168645, "calibration/batch_distribution_entropy": 0.6600876061573718, "calibration/buffer_distribution_entropy": 0.734922833534972, "calibration/confidence_entropy": 0.21770267843701943, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.080078125, "calibration/coverage@15%": 0.323828125, "calibration/coverage@20%": 0.360546875, "calibration/coverage@25%": 0.498828125, "calibration/coverage@30%": 0.5578125, "calibration/coverage@5%": 0.0, "calibration/ece": 0.18084708634125093, "calibration/mean_confidence": 0.5571085219541783, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 558.8, "completions/max_terminated_length": 558.8, "completions/mean_length": 227.0599609375, "completions/mean_terminated_length": 227.1940704345703, "completions/min_length": 21.0, "completions/min_terminated_length": 104.6, "epoch": 0.96, "grad_norm": 0.0019881308544427156, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 1020667375.0, "reward": 1.2900043249130249, "reward_std": 0.12938351929187775, "rewards/accuracy_reward": 0.51416015625, "rewards/batch_coverage_0": 0.5834320783615112, "rewards/batch_coverage_1": 0.5834320783615112, "rewards/batch_coverage_10": 0.6152560353279114, "rewards/batch_coverage_15": 0.6167209267616272, "rewards/batch_coverage_20": 0.6183749675750733, "rewards/batch_coverage_25": 0.6207085251808167, "rewards/batch_coverage_5": 0.5995626091957093, "rewards/brier_reward": 0.8106549620628357, "rewards/confidence_uniqueness_reward": 0.8551827192306518, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002351419860497117, "rewards/frontier_ece_reward": 0.012325448356568814, "rewards/frontier_entropy_batch_reward": -0.5826962471008301, "signal/accuracy_reward/centered_abs_mean": 0.068255615234375, "signal/accuracy_reward/group_std_mean": 0.09154722243547439, "signal/accuracy_reward/group_zero_std_frac": 0.73125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0341278076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0341278076171875, "signal/advantage_abs_mean": 0.09666871875524521, "signal/advantage_pre_scale_abs_mean": 0.09666871875524521, "signal/advantage_pre_scale_std": 0.16768636405467988, "signal/advantage_std": 0.16768636405467988, "signal/batch_coverage_0/centered_abs_mean": 0.12134916931390763, "signal/batch_coverage_0/group_std_mean": 0.1602302074432373, "signal/batch_coverage_0/group_zero_std_frac": 0.003125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012134917080402374, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012134917080402374, "signal/batch_coverage_1/centered_abs_mean": 0.12134916931390763, "signal/batch_coverage_1/group_std_mean": 0.1602302074432373, "signal/batch_coverage_1/group_zero_std_frac": 0.003125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012134917080402374, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012134917080402374, "signal/batch_coverage_10/centered_abs_mean": 0.1270793542265892, "signal/batch_coverage_10/group_std_mean": 0.16881133019924163, "signal/batch_coverage_10/group_zero_std_frac": 0.003125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012707936018705368, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012707936018705368, "signal/batch_coverage_15/centered_abs_mean": 0.12548649162054062, "signal/batch_coverage_15/group_std_mean": 0.16757002770900725, "signal/batch_coverage_15/group_zero_std_frac": 0.003125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01254864949733019, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01254864949733019, "signal/batch_coverage_20/centered_abs_mean": 0.1269705682992935, "signal/batch_coverage_20/group_std_mean": 0.1693786710500717, "signal/batch_coverage_20/group_zero_std_frac": 0.003125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012697057239711285, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.012697057239711285, "signal/batch_coverage_25/centered_abs_mean": 0.1278000921010971, "signal/batch_coverage_25/group_std_mean": 0.17063870429992675, "signal/batch_coverage_25/group_zero_std_frac": 0.003125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012780009768903255, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.012780009768903255, "signal/batch_coverage_5/centered_abs_mean": 0.12397996038198471, "signal/batch_coverage_5/group_std_mean": 0.1644669473171234, "signal/batch_coverage_5/group_zero_std_frac": 0.003125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012397996336221694, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012397996336221694, "signal/brier_reward/centered_abs_mean": 0.10672413557767868, "signal/brier_reward/group_std_mean": 0.13898140490055083, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010672413557767869, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010672413557767869, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0589519664645195, "signal/confidence_uniqueness_reward/group_std_mean": 0.07484492361545562, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.003125, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005895196599885822, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005895196599885822, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011933622765354813, "signal/frontier_aurc_reward/group_std_mean": 0.0016293063759803771, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4917028420313726e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4917028420313726e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.013726240582764148, "signal/frontier_ece_reward/group_std_mean": 0.018288495391607283, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013726240722462535, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013726240722462535, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27692450284957887, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35284039974212644, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.109375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027692450582981108, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027692450582981108, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.43738341336907305, "eval_calibration/batch_distribution_entropy": 0.6284206643934745, "eval_calibration/buffer_distribution_entropy": 0.7325220520469002, "eval_calibration/confidence_entropy": 0.20829797554021534, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.3125, "eval_calibration/coverage@25%": 0.328125, "eval_calibration/coverage@30%": 0.34375, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.20566188520005838, "eval_calibration/mean_confidence": 0.5225051785518035, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 439.5, "eval_completions/max_terminated_length": 439.5, "eval_completions/mean_length": 233.93871307373047, "eval_completions/mean_terminated_length": 233.93871307373047, "eval_completions/min_length": 117.5, "eval_completions/min_terminated_length": 117.5, "eval_loss": 0.0, "eval_num_tokens": 1020667375.0, "eval_reward": 0.9656747579574585, "eval_reward_std": 0.26930802315473557, "eval_rewards/accuracy_reward": 0.427734375, "eval_rewards/batch_coverage_0": 0.28614822030067444, "eval_rewards/batch_coverage_1": 0.28614822030067444, "eval_rewards/batch_coverage_10": 0.28418153524398804, "eval_rewards/batch_coverage_15": 0.27920960262417793, "eval_rewards/batch_coverage_20": 0.24274297058582306, "eval_rewards/batch_coverage_25": 0.24443916976451874, "eval_rewards/batch_coverage_5": 0.28614822030067444, "eval_rewards/brier_reward": 0.7955427914857864, "eval_rewards/confidence_uniqueness_reward": 0.803955078125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003532207338139415, "eval_rewards/frontier_ece_reward": 0.010001325979828835, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.3192, "eval_samples_per_second": 22.402, "eval_signal/accuracy_reward/centered_abs_mean": 0.4737548828125, "eval_signal/accuracy_reward/group_std_mean": 0.49411213397979736, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23687744140625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23687744140625, "eval_signal/advantage_abs_mean": 0.2209746055305004, "eval_signal/advantage_pre_scale_abs_mean": 0.2209746055305004, "eval_signal/advantage_pre_scale_std": 0.2672167122364044, "eval_signal/advantage_std": 0.2672167122364044, "eval_signal/batch_coverage_0/centered_abs_mean": 0.42352383583784103, "eval_signal/batch_coverage_0/group_std_mean": 0.48895537108182907, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.04235238581895828, "eval_signal/batch_coverage_0/weight": 0.10000000149011612, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.04235238581895828, "eval_signal/batch_coverage_1/centered_abs_mean": 0.42352383583784103, "eval_signal/batch_coverage_1/group_std_mean": 0.48895537108182907, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.04235238581895828, "eval_signal/batch_coverage_1/weight": 0.10000000149011612, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.04235238581895828, "eval_signal/batch_coverage_10/centered_abs_mean": 0.42084096372127533, "eval_signal/batch_coverage_10/group_std_mean": 0.4858129918575287, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04208409693092108, "eval_signal/batch_coverage_10/weight": 0.10000000149011612, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.04208409693092108, "eval_signal/batch_coverage_15/centered_abs_mean": 0.4104515314102173, "eval_signal/batch_coverage_15/group_std_mean": 0.4733032360672951, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04104515444487333, "eval_signal/batch_coverage_15/weight": 0.10000000149011612, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.04104515444487333, "eval_signal/batch_coverage_20/centered_abs_mean": 0.3303024247288704, "eval_signal/batch_coverage_20/group_std_mean": 0.38545073568820953, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03303024219349027, "eval_signal/batch_coverage_20/weight": 0.10000000149011612, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.03303024219349027, "eval_signal/batch_coverage_25/centered_abs_mean": 0.33572477102279663, "eval_signal/batch_coverage_25/group_std_mean": 0.38896334171295166, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03357247728854418, "eval_signal/batch_coverage_25/weight": 0.10000000149011612, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.03357247728854418, "eval_signal/batch_coverage_5/centered_abs_mean": 0.42352383583784103, "eval_signal/batch_coverage_5/group_std_mean": 0.48895537108182907, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04235238581895828, "eval_signal/batch_coverage_5/weight": 0.10000000149011612, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.04235238581895828, "eval_signal/brier_reward/centered_abs_mean": 0.2786501497030258, "eval_signal/brier_reward/group_std_mean": 0.34969785809516907, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02786501543596387, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02786501543596387, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.111419677734375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1263575330376625, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011141967726871371, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011141967726871371, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004553930484689772, "eval_signal/frontier_aurc_reward/group_std_mean": 0.00847341027110815, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.692413469660096e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.692413469660096e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.022189988754689693, "eval_signal/frontier_ece_reward/group_std_mean": 0.028291028458625078, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002218998968601227, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002218998968601227, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.179, "step": 300 }, { "calibration/aurc": 0.2495020566812151, "calibration/batch_distribution_entropy": 0.6821244193032813, "calibration/buffer_distribution_entropy": 0.7306995672624862, "calibration/confidence_entropy": 0.21918015091457263, "calibration/coverage@0%": 0.008984375, "calibration/coverage@1%": 0.008984375, "calibration/coverage@10%": 0.113671875, "calibration/coverage@15%": 0.46630228718199607, "calibration/coverage@20%": 0.5499304366438356, "calibration/coverage@25%": 0.6015204562133072, "calibration/coverage@30%": 0.6527129708904109, "calibration/coverage@5%": 0.0890625, "calibration/ece": 0.16477880565297598, "calibration/mean_confidence": 0.6071220864675294, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 664.2, "completions/max_terminated_length": 664.2, "completions/mean_length": 232.00810546875, "completions/mean_terminated_length": 232.14267272949218, "completions/min_length": 42.2, "completions/min_terminated_length": 107.4, "epoch": 0.976, "grad_norm": 0.0017184000462293625, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1037904258.0, "reward": 1.3052581071853637, "reward_std": 0.12845401763916015, "rewards/accuracy_reward": 0.5328125, "rewards/batch_coverage_0": 0.5998408555984497, "rewards/batch_coverage_1": 0.5998408555984497, "rewards/batch_coverage_10": 0.6285971403121948, "rewards/batch_coverage_15": 0.6305443048477173, "rewards/batch_coverage_20": 0.6343398213386535, "rewards/batch_coverage_25": 0.6352273762226105, "rewards/batch_coverage_5": 0.615449583530426, "rewards/brier_reward": 0.8177313566207886, "rewards/confidence_uniqueness_reward": 0.8184030890464783, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0027989137917757033, "rewards/frontier_ece_reward": 0.012824122048914432, "rewards/frontier_entropy_batch_reward": -0.6000243067741394, "signal/accuracy_reward/centered_abs_mean": 0.07099609375, "signal/accuracy_reward/group_std_mean": 0.0958763062953949, "signal/accuracy_reward/group_zero_std_frac": 0.721875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035498046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035498046875, "signal/advantage_abs_mean": 0.09721664190292359, "signal/advantage_pre_scale_abs_mean": 0.09721664190292359, "signal/advantage_pre_scale_std": 0.16938933432102204, "signal/advantage_std": 0.16938933432102204, "signal/batch_coverage_0/centered_abs_mean": 0.121105095744133, "signal/batch_coverage_0/group_std_mean": 0.16181692481040955, "signal/batch_coverage_0/group_zero_std_frac": 0.025, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012110509909689426, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012110509909689426, "signal/batch_coverage_1/centered_abs_mean": 0.121105095744133, "signal/batch_coverage_1/group_std_mean": 0.16181692481040955, "signal/batch_coverage_1/group_zero_std_frac": 0.025, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012110509909689426, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012110509909689426, "signal/batch_coverage_10/centered_abs_mean": 0.12876548916101455, "signal/batch_coverage_10/group_std_mean": 0.17223238348960876, "signal/batch_coverage_10/group_zero_std_frac": 0.025, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012876549735665321, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012876549735665321, "signal/batch_coverage_15/centered_abs_mean": 0.13056999146938325, "signal/batch_coverage_15/group_std_mean": 0.17412794828414918, "signal/batch_coverage_15/group_zero_std_frac": 0.025, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.013056999444961548, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.013056999444961548, "signal/batch_coverage_20/centered_abs_mean": 0.13263879269361495, "signal/batch_coverage_20/group_std_mean": 0.17653416395187377, "signal/batch_coverage_20/group_zero_std_frac": 0.025, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013263879343867302, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013263879343867302, "signal/batch_coverage_25/centered_abs_mean": 0.13004723638296128, "signal/batch_coverage_25/group_std_mean": 0.17350882589817046, "signal/batch_coverage_25/group_zero_std_frac": 0.025, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013004723750054837, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.013004723750054837, "signal/batch_coverage_5/centered_abs_mean": 0.1247675284743309, "signal/batch_coverage_5/group_std_mean": 0.16621632874011993, "signal/batch_coverage_5/group_zero_std_frac": 0.025, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012476752884685994, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012476752884685994, "signal/brier_reward/centered_abs_mean": 0.10762623548507691, "signal/brier_reward/group_std_mean": 0.14009990394115449, "signal/brier_reward/group_zero_std_frac": 0.025, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010762624442577362, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010762624442577362, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07212853208184242, "signal/confidence_uniqueness_reward/group_std_mean": 0.09378160238265991, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.025, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007212853524833918, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007212853524833918, "signal/format_reward/centered_abs_mean": 0.00142822265625, "signal/format_reward/group_std_mean": 0.002923433808609843, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000714111328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000714111328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014352105092257262, "signal/frontier_aurc_reward/group_std_mean": 0.0020134341903030872, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.794013242033543e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.794013242033543e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.013121502846479416, "signal/frontier_ece_reward/group_std_mean": 0.01770468596369028, "signal/frontier_ece_reward/group_zero_std_frac": 0.06875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013121502939611672, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013121502939611672, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27344033122062683, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3469135522842407, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.171875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02734403349459171, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02734403349459171, "step": 305 }, { "calibration/aurc": 0.36944233718005665, "calibration/batch_distribution_entropy": 0.6843551315294027, "calibration/buffer_distribution_entropy": 0.729047477608188, "calibration/confidence_entropy": 0.21484423873154287, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.031640625, "calibration/coverage@15%": 0.08125, "calibration/coverage@20%": 0.1671875, "calibration/coverage@25%": 0.355078125, "calibration/coverage@30%": 0.440625, "calibration/coverage@5%": 0.031640625, "calibration/ece": 0.21733653989218463, "calibration/mean_confidence": 0.5253131950893684, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 743.0, "completions/max_terminated_length": 743.0, "completions/mean_length": 229.2525390625, "completions/mean_terminated_length": 229.2525390625, "completions/min_length": 102.8, "completions/min_terminated_length": 102.8, "epoch": 0.992, "grad_norm": 0.0018200714839622378, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 1055380284.0, "reward": 1.2748222351074219, "reward_std": 0.12597642987966537, "rewards/accuracy_reward": 0.5037109375, "rewards/batch_coverage_0": 0.5769623160362244, "rewards/batch_coverage_1": 0.5769623160362244, "rewards/batch_coverage_10": 0.6048051595687867, "rewards/batch_coverage_15": 0.6059326887130737, "rewards/batch_coverage_20": 0.6096433520317077, "rewards/batch_coverage_25": 0.6107012748718261, "rewards/batch_coverage_5": 0.5928257584571839, "rewards/brier_reward": 0.7898470520973205, "rewards/confidence_uniqueness_reward": 0.8456288576126099, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027298410423099995, "rewards/frontier_ece_reward": 0.01039378009736538, "rewards/frontier_entropy_batch_reward": -0.5927170753479004, "signal/accuracy_reward/centered_abs_mean": 0.06654052734375, "signal/accuracy_reward/group_std_mean": 0.08761050999164581, "signal/accuracy_reward/group_zero_std_frac": 0.75, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.033270263671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.033270263671875, "signal/advantage_abs_mean": 0.09469048231840134, "signal/advantage_pre_scale_abs_mean": 0.09469048231840134, "signal/advantage_pre_scale_std": 0.16549064517021178, "signal/advantage_std": 0.16549064517021178, "signal/batch_coverage_0/centered_abs_mean": 0.1224028378725052, "signal/batch_coverage_0/group_std_mean": 0.16056158244609833, "signal/batch_coverage_0/group_zero_std_frac": 0.009375, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012240284122526646, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.012240284122526646, "signal/batch_coverage_1/centered_abs_mean": 0.1224028378725052, "signal/batch_coverage_1/group_std_mean": 0.16056158244609833, "signal/batch_coverage_1/group_zero_std_frac": 0.009375, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012240284122526646, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.012240284122526646, "signal/batch_coverage_10/centered_abs_mean": 0.12704037725925446, "signal/batch_coverage_10/group_std_mean": 0.16898048520088196, "signal/batch_coverage_10/group_zero_std_frac": 0.009375, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.012704038433730603, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.012704038433730603, "signal/batch_coverage_15/centered_abs_mean": 0.12583906352519988, "signal/batch_coverage_15/group_std_mean": 0.16771935522556305, "signal/batch_coverage_15/group_zero_std_frac": 0.009375, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.012583906389772892, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.012583906389772892, "signal/batch_coverage_20/centered_abs_mean": 0.12735906690359117, "signal/batch_coverage_20/group_std_mean": 0.17019020318984984, "signal/batch_coverage_20/group_zero_std_frac": 0.009375, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.01273590698838234, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.01273590698838234, "signal/batch_coverage_25/centered_abs_mean": 0.12817565351724625, "signal/batch_coverage_25/group_std_mean": 0.1711154282093048, "signal/batch_coverage_25/group_zero_std_frac": 0.009375, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012817565724253655, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.012817565724253655, "signal/batch_coverage_5/centered_abs_mean": 0.12552004754543306, "signal/batch_coverage_5/group_std_mean": 0.16624825298786164, "signal/batch_coverage_5/group_zero_std_frac": 0.009375, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.012552005238831043, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.012552005238831043, "signal/brier_reward/centered_abs_mean": 0.10650904327630997, "signal/brier_reward/group_std_mean": 0.13855279684066774, "signal/brier_reward/group_zero_std_frac": 0.009375, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010650904849171638, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010650904849171638, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06525479182600975, "signal/confidence_uniqueness_reward/group_std_mean": 0.08299526795744896, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.009375, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0065254792105406524, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0065254792105406524, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015120732598006726, "signal/frontier_aurc_reward/group_std_mean": 0.0020327494712546468, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.890091589302756e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.890091589302756e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01312310602515936, "signal/frontier_ece_reward/group_std_mean": 0.0178884819149971, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001312310597859323, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001312310597859323, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28517351746559144, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3595928966999054, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.134375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028517350926995276, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028517350926995276, "step": 310 }, { "calibration/aurc": 0.2572230256137267, "calibration/batch_distribution_entropy": 0.6403200916464951, "calibration/buffer_distribution_entropy": 0.727542698126519, "calibration/confidence_entropy": 0.20421884610236768, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0390625, "calibration/coverage@15%": 0.1025390625, "calibration/coverage@20%": 0.30078125, "calibration/coverage@25%": 0.599609375, "calibration/coverage@30%": 0.734375, "calibration/coverage@5%": 0.0390625, "calibration/ece": 0.18167404731060102, "calibration/mean_confidence": 0.6568276885579987, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00031887755102039117, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 228.7561492919922, "completions/mean_terminated_length": 228.83031463623047, "completions/min_length": 55.5, "completions/min_terminated_length": 106.0, "epoch": 0.9984, "num_tokens": 1062317684.0, "reward": 1.2654950618743896, "reward_std": 0.13095472753047943, "rewards/accuracy_reward": 0.535400390625, "rewards/batch_coverage_0": 0.5433026552200317, "rewards/batch_coverage_1": 0.5433026552200317, "rewards/batch_coverage_10": 0.5609183311462402, "rewards/batch_coverage_15": 0.5650316774845123, "rewards/batch_coverage_20": 0.5628845393657684, "rewards/batch_coverage_25": 0.5658237636089325, "rewards/batch_coverage_5": 0.5589036047458649, "rewards/brier_reward": 0.7723598182201385, "rewards/confidence_uniqueness_reward": 0.8834567368030548, "rewards/format_reward": 0.999755859375, "rewards/frontier_aurc_reward": -0.002115529146976769, "rewards/frontier_ece_reward": 0.01011998625472188, "rewards/frontier_entropy_batch_reward": -0.586669921875, "signal/accuracy_reward/centered_abs_mean": 0.0801849365234375, "signal/accuracy_reward/group_std_mean": 0.10365201532840729, "signal/accuracy_reward/group_zero_std_frac": 0.7109375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04009246826171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04009246826171875, "signal/advantage_abs_mean": 0.10032283142209053, "signal/advantage_pre_scale_abs_mean": 0.10032283142209053, "signal/advantage_pre_scale_std": 0.16637051850557327, "signal/advantage_std": 0.16637051850557327, "signal/batch_coverage_0/centered_abs_mean": 0.1333833485841751, "signal/batch_coverage_0/group_std_mean": 0.17518048733472824, "signal/batch_coverage_0/group_zero_std_frac": 0.0078125, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.013338335324078798, "signal/batch_coverage_0/weight": 0.10000000149011612, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.013338335324078798, "signal/batch_coverage_1/centered_abs_mean": 0.1333833485841751, "signal/batch_coverage_1/group_std_mean": 0.17518048733472824, "signal/batch_coverage_1/group_zero_std_frac": 0.0078125, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.013338335324078798, "signal/batch_coverage_1/weight": 0.10000000149011612, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.013338335324078798, "signal/batch_coverage_10/centered_abs_mean": 0.13491075485944748, "signal/batch_coverage_10/group_std_mean": 0.17798280715942383, "signal/batch_coverage_10/group_zero_std_frac": 0.0078125, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.013491075951606035, "signal/batch_coverage_10/weight": 0.10000000149011612, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.013491075951606035, "signal/batch_coverage_15/centered_abs_mean": 0.13440050929784775, "signal/batch_coverage_15/group_std_mean": 0.1781018227338791, "signal/batch_coverage_15/group_zero_std_frac": 0.0078125, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01344005111604929, "signal/batch_coverage_15/weight": 0.10000000149011612, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.01344005111604929, "signal/batch_coverage_20/centered_abs_mean": 0.13126233220100403, "signal/batch_coverage_20/group_std_mean": 0.1750546544790268, "signal/batch_coverage_20/group_zero_std_frac": 0.0078125, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.013126233592629433, "signal/batch_coverage_20/weight": 0.10000000149011612, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.013126233592629433, "signal/batch_coverage_25/centered_abs_mean": 0.1286875493824482, "signal/batch_coverage_25/group_std_mean": 0.17355988919734955, "signal/batch_coverage_25/group_zero_std_frac": 0.0078125, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.012868755962699652, "signal/batch_coverage_25/weight": 0.10000000149011612, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.012868755962699652, "signal/batch_coverage_5/centered_abs_mean": 0.13534656167030334, "signal/batch_coverage_5/group_std_mean": 0.17881041020154953, "signal/batch_coverage_5/group_zero_std_frac": 0.0078125, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.013534656260162592, "signal/batch_coverage_5/weight": 0.10000000149011612, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.013534656260162592, "signal/brier_reward/centered_abs_mean": 0.11168951913714409, "signal/brier_reward/group_std_mean": 0.1451142355799675, "signal/brier_reward/group_zero_std_frac": 0.0078125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011168952565640211, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011168952565640211, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.050195975229144096, "signal/confidence_uniqueness_reward/group_std_mean": 0.06316389329731464, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.015625, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005019597476348281, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005019597476348281, "signal/format_reward/centered_abs_mean": 0.0004730224609375, "signal/format_reward/group_std_mean": 0.0013810679083690047, "signal/format_reward/group_zero_std_frac": 0.9921875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011146239121444523, "signal/frontier_aurc_reward/group_std_mean": 0.0015803179703652859, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3932799447502475e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3932799447502475e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.014482187572866678, "signal/frontier_ece_reward/group_std_mean": 0.01911458931863308, "signal/frontier_ece_reward/group_zero_std_frac": 0.0234375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001448218827135861, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001448218827135861, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2982959598302841, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36800524592399597, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1484375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029829598031938076, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029829598031938076, "step": 312, "total_flos": 0.0, "train_loss": -0.0003725691671999625, "train_runtime": 61071.055, "train_samples_per_second": 0.327, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1062317684, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }