{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.8019090294837952, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.007053256966173649, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.4675386239559884e-08, "aux_brier/mean_group_std": 0.010562199023242428, "aux_brier/mean_r": 0.4475614425895778, "aux_brier/n_active_tok": 22.666666666666668, "aux_brier/n_groups": 4.833333333333333, "aux_brier/n_step_records": 5.666666666666667, "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.26953125, "calib/ece": 0.6136363636363635, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.6363636363636364, "calib/gap": 0.10124999999999995, "calib/mean_conf": 0.8863636363636364, "calib/mu_c": 0.96, "calib/mu_w": 0.85875, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.6136363636363635, "calib/std_conf": 0.14169575134919613, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 726.01171875, "completions/mean_terminated_length": 771.19921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.26548826694488525, "learning_rate": 7.5e-07, "loss": 0.0159, "num_tokens": 824582.0, "reward": 0.03019765578210354, "reward_std": 0.05533502995967865, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.011415624991059303, "rewards/format_reward_step": 0.03125, "step": 3 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.966125440233668e-08, "aux_brier/mean_group_std": 0.026093932111437953, "aux_brier/mean_r": 0.5140029739995847, "aux_brier/n_active_tok": 22.666666666666668, "aux_brier/n_groups": 4.833333333333333, "aux_brier/n_step_records": 5.666666666666667, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.7727272727272727, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.7430769230769232, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.09818181818181826, "calib/mean_conf": 0.8969230769230769, "calib/mu_c": 0.98, "calib/mu_w": 0.8818181818181817, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7430769230769232, "calib/std_conf": 0.20521600684999117, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 709.09375, "completions/mean_terminated_length": 753.228271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.004266666666666667, "grad_norm": 0.003563561709597707, "learning_rate": 1.0000000000000002e-06, "loss": 0.0307, "num_tokens": 1112278.0, "reward": 0.030818065628409386, "reward_std": 0.07425656169652939, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.013897265307605267, "rewards/format_reward_step": 0.0390625, "step": 4 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3686993069781137e-08, "aux_brier/mean_group_std": 0.04379681054330711, "aux_brier/mean_r": 0.42881758133179143, "aux_brier/n_active_tok": 32.888888888888886, "aux_brier/n_groups": 5.888888888888889, "aux_brier/n_step_records": 8.222222222222221, "calib/answer_extract_rate": 0.046875, "calib/auroc": 0.6190476190476191, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.565, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.7, "calib/gap": 0.1461904761904762, "calib/mean_conf": 0.841, "calib/mu_c": 0.9433333333333334, "calib/mu_w": 0.7971428571428572, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.5529999999999999, "calib/std_conf": 0.2564546743578678, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 610.21875, "completions/mean_terminated_length": 682.1659545898438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.00464936438947916, "learning_rate": 1.25e-06, "loss": 0.0103, "num_tokens": 1375182.0, "reward": 0.03314834088087082, "reward_std": 0.06054586544632912, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.015405858866870403, "rewards/format_reward_step": 0.03515625, "step": 5 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.232474389212257e-08, "aux_brier/mean_group_std": 0.013661028843392754, "aux_brier/mean_r": 0.4007950204987466, "aux_brier/n_active_tok": 28.533333333333335, "aux_brier/n_groups": 6.2, "aux_brier/n_step_records": 7.133333333333334, "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.0625, "calib/avg_num_step_conf": 0.49609375, "calib/ece": 0.9123529411764707, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.022499999999999964, "calib/mean_conf": 0.9711764705882354, "calib/mu_c": 0.95, "calib/mu_w": 0.9724999999999999, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.9123529411764707, "calib/std_conf": 0.019668533526235873, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 598.41015625, "completions/mean_terminated_length": 635.6556396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.05702491104602814, "learning_rate": 1.5e-06, "loss": -0.0057, "num_tokens": 1634327.0, "reward": 0.03291259706020355, "reward_std": 0.07377687096595764, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0066503905691206455, "rewards/format_reward_step": 0.0546875, "step": 6 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.07054222048464e-09, "aux_brier/mean_group_std": 0.03345712440611508, "aux_brier/mean_r": 0.45309954942507086, "aux_brier/n_active_tok": 22.133333333333333, "aux_brier/n_groups": 4.8, "aux_brier/n_step_records": 5.533333333333333, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.33333333333333337, "calib/avg_num_step_conf": 0.34765625, "calib/ece": 0.7453333333333334, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8666666666666667, "calib/gap": -0.006666666666666599, "calib/mean_conf": 0.9453333333333334, "calib/mu_c": 0.9400000000000001, "calib/mu_w": 0.9466666666666667, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.7453333333333334, "calib/std_conf": 0.05667058809960908, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 657.5234375, "completions/mean_terminated_length": 761.6561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.9773487448692322, "learning_rate": 1.75e-06, "loss": 0.0245, "num_tokens": 1910077.0, "reward": 0.03702617064118385, "reward_std": 0.09901833534240723, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.015292187221348286, "rewards/format_reward_step": 0.04296875, "step": 7 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.059045120134556e-08, "aux_brier/mean_group_std": 0.04683999810883638, "aux_brier/mean_r": 0.5308816023273762, "aux_brier/n_active_tok": 26.0, "aux_brier/n_groups": 5.083333333333333, "aux_brier/n_step_records": 6.5, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.796875, "calib/avg_num_step_conf": 0.3046875, "calib/ece": 0.4618750000000001, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.03874999999999995, "calib/mean_conf": 0.836875, "calib/mu_c": 0.85625, "calib/mu_w": 0.8175, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.3993750000000001, "calib/std_conf": 0.2816185085803133, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 576.5703125, "completions/mean_terminated_length": 628.0935668945312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.23815672099590302, "learning_rate": 2.0000000000000003e-06, "loss": 0.0261, "num_tokens": 2164191.0, "reward": 0.06468446552753448, "reward_std": 0.1265413910150528, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.03217539191246033, "rewards/format_reward_step": 0.05078125, "step": 8 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.461055149194584e-10, "aux_brier/mean_group_std": 0.015730202045537118, "aux_brier/mean_r": 0.43023866184339343, "aux_brier/n_active_tok": 28.444444444444443, "aux_brier/n_groups": 6.777777777777778, "aux_brier/n_step_records": 7.111111111111111, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.9444444444444444, "calib/avg_num_step_conf": 0.2578125, "calib/ece": 0.7490000000000001, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.14555555555555555, "calib/mean_conf": 0.849, "calib/mu_c": 0.98, "calib/mu_w": 0.8344444444444444, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.7490000000000001, "calib/std_conf": 0.28727861041156544, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 613.953125, "completions/mean_terminated_length": 689.3508911132812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.005154241342097521, "learning_rate": 2.25e-06, "loss": -0.0141, "num_tokens": 2428899.0, "reward": 0.018406933173537254, "reward_std": 0.04806169122457504, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0033152345567941666, "rewards/format_reward_step": 0.02734375, "step": 9 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.5254254793226636e-08, "aux_brier/mean_group_std": 0.04791594090406018, "aux_brier/mean_r": 0.4576816674277268, "aux_brier/n_active_tok": 24.75, "aux_brier/n_groups": 4.875, "aux_brier/n_step_records": 6.1875, "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.6086956521739131, "calib/avg_num_step_conf": 0.421875, "calib/ece": 0.8261875000000001, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.09615217391304343, "calib/mean_conf": 0.8678541666666666, "calib/mu_c": 0.96, "calib/mu_w": 0.8638478260869565, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.8261875000000001, "calib/std_conf": 0.2272346199033917, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 668.12109375, "completions/mean_terminated_length": 709.7054443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.010666666666666666, "grad_norm": 1.2165741920471191, "learning_rate": 2.5e-06, "loss": 0.0376, "num_tokens": 2706738.0, "reward": 0.03873037174344063, "reward_std": 0.08526559174060822, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.014296485111117363, "rewards/format_reward_step": 0.0546875, "step": 10 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.191568579338384e-06, "aux_brier/mean_group_std": 0.08926198206335412, "aux_brier/mean_r": 0.382425037350804, "aux_brier/n_active_tok": 30.105263157894736, "aux_brier/n_groups": 5.315789473684211, "aux_brier/n_step_records": 7.526315789473684, "calib/answer_extract_rate": 0.15625, "calib/auroc": 0.5607142857142857, "calib/avg_num_step_conf": 0.5703125, "calib/ece": 0.7930303030303029, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.8484848484848485, "calib/gap": 0.011142857142857121, "calib/mean_conf": 0.9445454545454545, "calib/mu_c": 0.954, "calib/mu_w": 0.9428571428571428, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.171875, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.7930303030303029, "calib/std_conf": 0.04710342643979464, "calib/step_conf_rate": 0.12109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2967.0, "completions/max_terminated_length": 2967.0, "completions/mean_length": 628.97265625, "completions/mean_terminated_length": 694.038818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.3624080419540405, "learning_rate": 2.7500000000000004e-06, "loss": 0.0328, "num_tokens": 2972235.0, "reward": 0.07586494088172913, "reward_std": 0.13519909977912903, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.022209765389561653, "rewards/format_reward_step": 0.1015625, "step": 11 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.0138040675961317e-08, "aux_brier/mean_group_std": 0.08816123498156225, "aux_brier/mean_r": 0.5617093218650877, "aux_brier/n_active_tok": 36.0, "aux_brier/n_groups": 5.368421052631579, "aux_brier/n_step_records": 9.0, "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.20238095238095238, "calib/avg_num_step_conf": 0.703125, "calib/ece": 0.7838235294117647, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.8529411764705882, "calib/gap": -0.07119047619047625, "calib/mean_conf": 0.9302941176470588, "calib/mu_c": 0.8716666666666666, "calib/mu_w": 0.9428571428571428, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.19921875, "calib/nonempty_step_conf_rate": 0.1484375, "calib/pce": 0.7688235294117648, "calib/std_conf": 0.07946457938706168, "calib/step_conf_rate": 0.1484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 601.875, "completions/mean_terminated_length": 647.39501953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 1.0927518606185913, "learning_rate": 3e-06, "loss": 0.0794, "num_tokens": 3230491.0, "reward": 0.08419764786958694, "reward_std": 0.1351093053817749, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.032103125005960464, "rewards/format_reward_step": 0.10546875, "step": 12 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.745897776186085e-08, "aux_brier/mean_group_std": 0.030241205883298414, "aux_brier/mean_r": 0.3736955558950782, "aux_brier/n_active_tok": 24.444444444444443, "aux_brier/n_groups": 4.722222222222222, "aux_brier/n_step_records": 6.111111111111111, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.4789473684210526, "calib/avg_num_step_conf": 0.45703125, "calib/ece": 0.7216666666666666, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.030315789473683963, "calib/mean_conf": 0.93, "calib/mu_c": 0.954, "calib/mu_w": 0.923684210526316, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.7216666666666666, "calib/std_conf": 0.08883505314157619, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2781.0, "completions/max_terminated_length": 2781.0, "completions/mean_length": 578.16796875, "completions/mean_terminated_length": 627.165283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.3745361864566803, "learning_rate": 3.2500000000000002e-06, "loss": -0.0048, "num_tokens": 3483094.0, "reward": 0.062227047979831696, "reward_std": 0.13563629984855652, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.022345703095197678, "rewards/format_reward_step": 0.07421875, "step": 13 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -7.824561436553523e-07, "aux_brier/mean_group_std": 0.0509677167700097, "aux_brier/mean_r": 0.3953121614686363, "aux_brier/n_active_tok": 32.285714285714285, "aux_brier/n_groups": 5.571428571428571, "aux_brier/n_step_records": 8.071428571428571, "calib/answer_extract_rate": 0.1875, "calib/auroc": 0.6645161290322581, "calib/avg_num_step_conf": 0.9140625, "calib/ece": 0.782222222222222, "calib/final_conf_rate": 0.140625, "calib/format_rate": 0.12109375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.05909677419354864, "calib/mean_conf": 0.9211111111111112, "calib/mu_c": 0.9720000000000001, "calib/mu_w": 0.9129032258064514, "calib/nonempty_final_conf_rate": 0.140625, "calib/nonempty_reasoning_rate": 0.2265625, "calib/nonempty_step_conf_rate": 0.1796875, "calib/pce": 0.782222222222222, "calib/std_conf": 0.11413388283010675, "calib/step_conf_rate": 0.1796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2817.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 643.81640625, "completions/mean_terminated_length": 686.737548828125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933333333333333, "grad_norm": 0.3816582262516022, "learning_rate": 3.5e-06, "loss": 0.022, "num_tokens": 3753311.0, "reward": 0.0880625993013382, "reward_std": 0.16737763583660126, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.031937889754772186, "rewards/format_reward_step": 0.12109375, "step": 14 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.788997709183158e-08, "aux_brier/mean_group_std": 0.06431303721101182, "aux_brier/mean_r": 0.494478371672257, "aux_brier/n_active_tok": 34.22222222222222, "aux_brier/n_groups": 6.0, "aux_brier/n_step_records": 8.555555555555555, "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.41538461538461546, "calib/avg_num_step_conf": 0.90234375, "calib/ece": 0.7741935483870968, "calib/final_conf_rate": 0.12109375, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.6451612903225806, "calib/gap": -0.06784615384615378, "calib/mean_conf": 0.9129032258064514, "calib/mu_c": 0.8560000000000001, "calib/mu_w": 0.9238461538461539, "calib/nonempty_final_conf_rate": 0.12109375, "calib/nonempty_reasoning_rate": 0.22265625, "calib/nonempty_step_conf_rate": 0.17578125, "calib/pce": 0.7629032258064516, "calib/std_conf": 0.07788302015397096, "calib/step_conf_rate": 0.17578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 580.35546875, "completions/mean_terminated_length": 619.0458374023438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.12358175963163376, "learning_rate": 3.7500000000000005e-06, "loss": 0.0893, "num_tokens": 4009762.0, "reward": 0.08004707098007202, "reward_std": 0.1672627329826355, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.031125782057642937, "rewards/format_reward_step": 0.10546875, "step": 15 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.605632350200417e-08, "aux_brier/mean_group_std": 0.10973796878610398, "aux_brier/mean_r": 0.5119668618019643, "aux_brier/n_active_tok": 38.0, "aux_brier/n_groups": 5.7727272727272725, "aux_brier/n_step_records": 9.5, "calib/answer_extract_rate": 0.2109375, "calib/auroc": 0.5340501792114696, "calib/avg_num_step_conf": 0.81640625, "calib/ece": 0.6762499999999998, "calib/final_conf_rate": 0.15625, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.825, "calib/gap": 0.05516129032258066, "calib/mean_conf": 0.8772500000000001, "calib/mu_c": 0.9199999999999999, "calib/mu_w": 0.8648387096774193, "calib/nonempty_final_conf_rate": 0.15625, "calib/nonempty_reasoning_rate": 0.234375, "calib/nonempty_step_conf_rate": 0.15625, "calib/pce": 0.6642499999999998, "calib/std_conf": 0.24191927889277448, "calib/step_conf_rate": 0.15625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 693.84375, "completions/mean_terminated_length": 768.93505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.10435499250888824, "learning_rate": 4.000000000000001e-06, "loss": 0.0697, "num_tokens": 4296234.0, "reward": 0.10126493871212006, "reward_std": 0.21034234762191772, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.03787226602435112, "rewards/format_reward_step": 0.11328125, "step": 16 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.1079165019320953e-08, "aux_brier/mean_group_std": 0.0960157782281253, "aux_brier/mean_r": 0.49889396337404934, "aux_brier/n_active_tok": 41.54838709677419, "aux_brier/n_groups": 6.709677419354839, "aux_brier/n_step_records": 10.387096774193548, "calib/answer_extract_rate": 0.25390625, "calib/auroc": 0.43181818181818177, "calib/avg_num_step_conf": 1.33984375, "calib/ece": 0.7013529411764705, "calib/final_conf_rate": 0.19921875, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.8235294117647058, "calib/gap": 0.003252272727272687, "calib/mean_conf": 0.9001764705882351, "calib/mu_c": 0.9027272727272727, "calib/mu_w": 0.899475, "calib/nonempty_final_conf_rate": 0.19921875, "calib/nonempty_reasoning_rate": 0.3203125, "calib/nonempty_step_conf_rate": 0.2421875, "calib/pce": 0.6929215686274509, "calib/std_conf": 0.1827727299651559, "calib/step_conf_rate": 0.2421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 585.78515625, "completions/mean_terminated_length": 635.427978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.7509145140647888, "learning_rate": 4.25e-06, "loss": 0.0676, "num_tokens": 4549723.0, "reward": 0.13649648427963257, "reward_std": 0.23179839551448822, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.053798433393239975, "rewards/format_reward_step": 0.16015625, "step": 17 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 8.246359257748002e-08, "aux_brier/mean_group_std": 0.06599999581363429, "aux_brier/mean_r": 0.4889344608586582, "aux_brier/n_active_tok": 37.857142857142854, "aux_brier/n_groups": 7.285714285714286, "aux_brier/n_step_records": 9.464285714285714, "calib/answer_extract_rate": 0.18359375, "calib/auroc": 0.6096774193548388, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.5914634146341464, "calib/final_conf_rate": 0.16015625, "calib/format_rate": 0.140625, "calib/frac_conf_gt_0.9": 0.7317073170731707, "calib/gap": 0.13574193548387092, "calib/mean_conf": 0.8353658536585364, "calib/mu_c": 0.938, "calib/mu_w": 0.802258064516129, "calib/nonempty_final_conf_rate": 0.16015625, "calib/nonempty_reasoning_rate": 0.23828125, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.5914634146341464, "calib/std_conf": 0.2663859071290262, "calib/step_conf_rate": 0.1953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2874.0, "completions/max_terminated_length": 2874.0, "completions/mean_length": 635.3828125, "completions/mean_terminated_length": 686.3206176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.23033437132835388, "learning_rate": 4.5e-06, "loss": 0.0745, "num_tokens": 4823101.0, "reward": 0.12414237856864929, "reward_std": 0.1830832064151764, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.05906952917575836, "rewards/format_reward_step": 0.140625, "step": 18 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.1799148291497127e-07, "aux_brier/mean_group_std": 0.12539111681370793, "aux_brier/mean_r": 0.4819700571199433, "aux_brier/n_active_tok": 49.29032258064516, "aux_brier/n_groups": 6.096774193548387, "aux_brier/n_step_records": 12.32258064516129, "calib/answer_extract_rate": 0.30078125, "calib/auroc": 0.48460591133004927, "calib/avg_num_step_conf": 1.55078125, "calib/ece": 0.7018055555555555, "calib/final_conf_rate": 0.28125, "calib/format_rate": 0.24609375, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.041896551724137865, "calib/mean_conf": 0.8812500000000001, "calib/mu_c": 0.9149999999999999, "calib/mu_w": 0.8731034482758621, "calib/nonempty_final_conf_rate": 0.28125, "calib/nonempty_reasoning_rate": 0.390625, "calib/nonempty_step_conf_rate": 0.34765625, "calib/pce": 0.6943055555555555, "calib/std_conf": 0.21670472422169296, "calib/step_conf_rate": 0.34765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 3002.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 526.578125, "completions/mean_terminated_length": 552.4754028320312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.6649148464202881, "learning_rate": 4.75e-06, "loss": 0.0971, "num_tokens": 5062665.0, "reward": 0.1994456946849823, "reward_std": 0.3087695837020874, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.08684530854225159, "rewards/format_reward_step": 0.24609375, "step": 19 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.661198010376012e-09, "aux_brier/mean_group_std": 0.18190807637293757, "aux_brier/mean_r": 0.49438430379502507, "aux_brier/n_active_tok": 79.125, "aux_brier/n_groups": 7.40625, "aux_brier/n_step_records": 19.78125, "calib/answer_extract_rate": 0.51953125, "calib/auroc": 0.5978576615831518, "calib/avg_num_step_conf": 2.5234375, "calib/ece": 0.6113913043478262, "calib/final_conf_rate": 0.44921875, "calib/format_rate": 0.40625, "calib/frac_conf_gt_0.9": 0.7565217391304347, "calib/gap": 0.05180101670297743, "calib/mean_conf": 0.9070434782608695, "calib/mu_c": 0.9435294117647058, "calib/mu_w": 0.8917283950617284, "calib/nonempty_final_conf_rate": 0.44921875, "calib/nonempty_reasoning_rate": 0.58984375, "calib/nonempty_step_conf_rate": 0.50390625, "calib/pce": 0.6113913043478262, "calib/std_conf": 0.14471545642063302, "calib/step_conf_rate": 0.50390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 437.01953125, "completions/mean_terminated_length": 449.3052062988281, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.021333333333333333, "grad_norm": 0.36984461545944214, "learning_rate": 5e-06, "loss": 0.0571, "num_tokens": 5279414.0, "reward": 0.3783619999885559, "reward_std": 0.48967522382736206, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.16969804465770721, "rewards/format_reward_step": 0.40625, "step": 20 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.3740270443939337e-09, "aux_brier/mean_group_std": 0.21181267982006952, "aux_brier/mean_r": 0.5149232417855898, "aux_brier/n_active_tok": 101.25, "aux_brier/n_groups": 8.78125, "aux_brier/n_step_records": 25.3125, "calib/answer_extract_rate": 0.640625, "calib/auroc": 0.5442108294930876, "calib/avg_num_step_conf": 3.29296875, "calib/ece": 0.7090269736842105, "calib/final_conf_rate": 0.59375, "calib/format_rate": 0.5234375, "calib/frac_conf_gt_0.9": 0.7236842105263158, "calib/gap": 0.03186025345622123, "calib/mean_conf": 0.8822230263157893, "calib/mu_c": 0.9082142857142859, "calib/mu_w": 0.8763540322580646, "calib/nonempty_final_conf_rate": 0.59375, "calib/nonempty_reasoning_rate": 0.734375, "calib/nonempty_step_conf_rate": 0.6328125, "calib/pce": 0.7035197368421053, "calib/std_conf": 0.19938412492578328, "calib/step_conf_rate": 0.6328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2691.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 443.609375, "completions/mean_terminated_length": 447.10235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0224, "grad_norm": 0.6879671216011047, "learning_rate": 4.9722222222222224e-06, "loss": 0.1353, "num_tokens": 5495938.0, "reward": 0.41851744055747986, "reward_std": 0.4342387318611145, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.17406976222991943, "rewards/format_reward_step": 0.5234375, "step": 21 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.913477306416896e-08, "aux_brier/mean_group_std": 0.19722453778596868, "aux_brier/mean_r": 0.5062000081744715, "aux_brier/n_active_tok": 104.0, "aux_brier/n_groups": 7.78125, "aux_brier/n_step_records": 26.0, "calib/answer_extract_rate": 0.6953125, "calib/auroc": 0.49372206852521816, "calib/avg_num_step_conf": 3.26953125, "calib/ece": 0.6656463414634147, "calib/final_conf_rate": 0.640625, "calib/format_rate": 0.5625, "calib/frac_conf_gt_0.9": 0.725609756097561, "calib/gap": 0.007945945945945776, "calib/mean_conf": 0.8897926829268293, "calib/mu_c": 0.8959459459459459, "calib/mu_w": 0.8880000000000001, "calib/nonempty_final_conf_rate": 0.640625, "calib/nonempty_reasoning_rate": 0.765625, "calib/nonempty_step_conf_rate": 0.6640625, "calib/pce": 0.6649146341463416, "calib/std_conf": 0.16682525089657244, "calib/step_conf_rate": 0.6640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 386.79296875, "completions/mean_terminated_length": 392.93255615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.023466666666666667, "grad_norm": 0.3435189723968506, "learning_rate": 4.944444444444445e-06, "loss": 0.2041, "num_tokens": 5696773.0, "reward": 0.48116904497146606, "reward_std": 0.45967790484428406, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.20592617988586426, "rewards/format_reward_step": 0.5625, "step": 22 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.6693964287328722e-08, "aux_brier/mean_group_std": 0.19466317894734803, "aux_brier/mean_r": 0.46551549790218716, "aux_brier/n_active_tok": 111.125, "aux_brier/n_groups": 7.8125, "aux_brier/n_step_records": 27.78125, "calib/answer_extract_rate": 0.74609375, "calib/auroc": 0.488888888888889, "calib/avg_num_step_conf": 3.51171875, "calib/ece": 0.6667318435754189, "calib/final_conf_rate": 0.69921875, "calib/format_rate": 0.62890625, "calib/frac_conf_gt_0.9": 0.7486033519553073, "calib/gap": 0.01290235690235686, "calib/mean_conf": 0.9125418994413406, "calib/mu_c": 0.9222727272727272, "calib/mu_w": 0.9093703703703704, "calib/nonempty_final_conf_rate": 0.69921875, "calib/nonempty_reasoning_rate": 0.80078125, "calib/nonempty_step_conf_rate": 0.69921875, "calib/pce": 0.6667318435754189, "calib/std_conf": 0.12324224307871748, "calib/step_conf_rate": 0.69921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 365.08203125, "completions/mean_terminated_length": 370.87701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.3371759355068207, "learning_rate": 4.9166666666666665e-06, "loss": 0.1058, "num_tokens": 5894170.0, "reward": 0.5453325510025024, "reward_std": 0.5091565251350403, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.23601758480072021, "rewards/format_reward_step": 0.62890625, "step": 23 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.6316448651959625e-09, "aux_brier/mean_group_std": 0.20653811194818572, "aux_brier/mean_r": 0.4973872652994401, "aux_brier/n_active_tok": 134.5, "aux_brier/n_groups": 9.03125, "aux_brier/n_step_records": 33.625, "calib/answer_extract_rate": 0.84765625, "calib/auroc": 0.5679466230936819, "calib/avg_num_step_conf": 4.30078125, "calib/ece": 0.6650044776119403, "calib/final_conf_rate": 0.78515625, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.7412935323383084, "calib/gap": 0.04283562091503279, "calib/mean_conf": 0.9038104477611941, "calib/mu_c": 0.9364166666666667, "calib/mu_w": 0.8935810457516339, "calib/nonempty_final_conf_rate": 0.78515625, "calib/nonempty_reasoning_rate": 0.91015625, "calib/nonempty_step_conf_rate": 0.8203125, "calib/pce": 0.6650044776119403, "calib/std_conf": 0.16970596737734678, "calib/step_conf_rate": 0.8203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1742.0, "completions/max_terminated_length": 1742.0, "completions/mean_length": 334.46875, "completions/mean_terminated_length": 341.1314697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.0256, "grad_norm": 1.9992388486862183, "learning_rate": 4.888888888888889e-06, "loss": 0.0465, "num_tokens": 6084306.0, "reward": 0.6237559914588928, "reward_std": 0.47158321738243103, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.2762739658355713, "rewards/format_reward_step": 0.734375, "step": 24 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.9139240043514194e-08, "aux_brier/mean_group_std": 0.20935374056120784, "aux_brier/mean_r": 0.505291674313617, "aux_brier/n_active_tok": 132.875, "aux_brier/n_groups": 8.1875, "aux_brier/n_step_records": 33.21875, "calib/answer_extract_rate": 0.8984375, "calib/auroc": 0.5071524577572964, "calib/avg_num_step_conf": 4.203125, "calib/ece": 0.6408339130434781, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.7086956521739131, "calib/gap": -0.012659600614439381, "calib/mean_conf": 0.8769921739130435, "calib/mu_c": 0.8677451612903226, "calib/mu_w": 0.880404761904762, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.88671875, "calib/pce": 0.6241304347826085, "calib/std_conf": 0.20299148864051295, "calib/step_conf_rate": 0.88671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 291.7265625, "completions/mean_terminated_length": 295.185791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.02666666666666667, "grad_norm": 0.3488129675388336, "learning_rate": 4.861111111111111e-06, "loss": 0.0202, "num_tokens": 6262212.0, "reward": 0.7352092862129211, "reward_std": 0.4682455062866211, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.31583714485168457, "rewards/format_reward_step": 0.8203125, "step": 25 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.5249401730830243e-09, "aux_brier/mean_group_std": 0.2004840616017761, "aux_brier/mean_r": 0.5115577585789893, "aux_brier/n_active_tok": 152.25, "aux_brier/n_groups": 8.8125, "aux_brier/n_step_records": 38.0625, "calib/answer_extract_rate": 0.8671875, "calib/auroc": 0.483560545308741, "calib/avg_num_step_conf": 4.81640625, "calib/ece": 0.700069585253456, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.7890625, "calib/frac_conf_gt_0.9": 0.7603686635944701, "calib/gap": 0.019425380914194323, "calib/mean_conf": 0.8869820276497697, "calib/mu_c": 0.902558139534884, "calib/mu_w": 0.8831327586206896, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.6944474654377878, "calib/std_conf": 0.20601633575678802, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1818.0, "completions/max_terminated_length": 1818.0, "completions/mean_length": 303.15234375, "completions/mean_terminated_length": 304.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.027733333333333332, "grad_norm": 1.5694950819015503, "learning_rate": 4.833333333333333e-06, "loss": 0.0266, "num_tokens": 6445059.0, "reward": 0.6304481029510498, "reward_std": 0.4700429439544678, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.256167471408844, "rewards/format_reward_step": 0.7890625, "step": 26 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.1927870401157215e-09, "aux_brier/mean_group_std": 0.19561626236250582, "aux_brier/mean_r": 0.5135547247653034, "aux_brier/n_active_tok": 148.625, "aux_brier/n_groups": 8.625, "aux_brier/n_step_records": 37.15625, "calib/answer_extract_rate": 0.90625, "calib/auroc": 0.5291819291819292, "calib/avg_num_step_conf": 4.734375, "calib/ece": 0.677325697503671, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.6475770925110133, "calib/gap": 0.023012454212454014, "calib/mean_conf": 0.8675494860499267, "calib/mu_c": 0.8859999999999999, "calib/mu_w": 0.8629875457875459, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.6733186490455212, "calib/std_conf": 0.2010847250794651, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2712.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 308.921875, "completions/mean_terminated_length": 312.5849914550781, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.0288, "grad_norm": 4.943506717681885, "learning_rate": 4.805555555555556e-06, "loss": 0.0659, "num_tokens": 6629359.0, "reward": 0.6669065952301025, "reward_std": 0.42902442812919617, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.3004390001296997, "rewards/format_reward_step": 0.83203125, "step": 27 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2140759217404984e-08, "aux_brier/mean_group_std": 0.21330967080190893, "aux_brier/mean_r": 0.5095590760513937, "aux_brier/n_active_tok": 161.25, "aux_brier/n_groups": 9.59375, "aux_brier/n_step_records": 40.3125, "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.4992000752870318, "calib/avg_num_step_conf": 5.07421875, "calib/ece": 0.6084577092511015, "calib/final_conf_rate": 0.88671875, "calib/format_rate": 0.84375, "calib/frac_conf_gt_0.9": 0.7180616740088106, "calib/gap": 0.012993920572181183, "calib/mean_conf": 0.8877537444933922, "calib/mu_c": 0.8969696969696969, "calib/mu_w": 0.8839757763975157, "calib/nonempty_final_conf_rate": 0.88671875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.6027312775330398, "calib/std_conf": 0.18186719563245113, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2578.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 318.5625, "completions/mean_terminated_length": 322.3399353027344, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.029866666666666666, "grad_norm": 3.2336666584014893, "learning_rate": 4.777777777777778e-06, "loss": 0.049, "num_tokens": 6817855.0, "reward": 0.770412802696228, "reward_std": 0.4679546654224396, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.3472761809825897, "rewards/format_reward_step": 0.84375, "step": 28 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.2023973334238676e-09, "aux_brier/mean_group_std": 0.22325800543385368, "aux_brier/mean_r": 0.5106411767447425, "aux_brier/n_active_tok": 180.25, "aux_brier/n_groups": 10.875, "aux_brier/n_step_records": 45.0625, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.5476190476190476, "calib/avg_num_step_conf": 5.75390625, "calib/ece": 0.7278418803418804, "calib/final_conf_rate": 0.9140625, "calib/format_rate": 0.8671875, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.04591145833333332, "calib/mean_conf": 0.9073290598290599, "calib/mu_c": 0.945, "calib/mu_w": 0.8990885416666666, "calib/nonempty_final_conf_rate": 0.9140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.7278418803418804, "calib/std_conf": 0.16810984990225533, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2094.0, "completions/max_terminated_length": 2094.0, "completions/mean_length": 341.28515625, "completions/mean_terminated_length": 341.28515625, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.030933333333333334, "grad_norm": 7.195005893707275, "learning_rate": 4.75e-06, "loss": 0.0357, "num_tokens": 7012352.0, "reward": 0.6658509373664856, "reward_std": 0.4067962169647217, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.2727789282798767, "rewards/format_reward_step": 0.8671875, "step": 29 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.267420791210675e-09, "aux_brier/mean_group_std": 0.21119422611148528, "aux_brier/mean_r": 0.5397748448564954, "aux_brier/n_active_tok": 170.25, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 42.5625, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5150462962962963, "calib/avg_num_step_conf": 5.3984375, "calib/ece": 0.6678237693389593, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.6286919831223629, "calib/gap": 0.05370184082892415, "calib/mean_conf": 0.865714064697609, "calib/mu_c": 0.9085395833333333, "calib/mu_w": 0.8548377425044091, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.665503094233474, "calib/std_conf": 0.21664601599961172, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 345.0859375, "completions/mean_terminated_length": 346.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.032, "grad_norm": 0.8084984421730042, "learning_rate": 4.722222222222222e-06, "loss": 0.0394, "num_tokens": 7207678.0, "reward": 0.7224695682525635, "reward_std": 0.42729613184928894, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.3273780047893524, "rewards/format_reward_step": 0.890625, "step": 30 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.4573048948141043e-08, "aux_brier/mean_group_std": 0.19195187264691851, "aux_brier/mean_r": 0.5383719147935322, "aux_brier/n_active_tok": 179.0, "aux_brier/n_groups": 10.125, "aux_brier/n_step_records": 44.75, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4319251456745854, "calib/avg_num_step_conf": 5.69921875, "calib/ece": 0.68948625, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.005395764231286182, "calib/mean_conf": 0.87448625, "calib/mu_c": 0.8788478260869563, "calib/mu_w": 0.8734520618556701, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6861529166666667, "calib/std_conf": 0.195830454141512, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2112.0, "completions/max_terminated_length": 2112.0, "completions/mean_length": 319.078125, "completions/mean_terminated_length": 320.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.03306666666666667, "grad_norm": 0.4686136841773987, "learning_rate": 4.694444444444445e-06, "loss": 0.0378, "num_tokens": 7395274.0, "reward": 0.7197814583778381, "reward_std": 0.38697105646133423, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.31662583351135254, "rewards/format_reward_step": 0.921875, "step": 31 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.3076910377349833e-09, "aux_brier/mean_group_std": 0.23219807412544136, "aux_brier/mean_r": 0.6105224431929146, "aux_brier/n_active_tok": 180.875, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 45.21875, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4489905498281787, "calib/avg_num_step_conf": 5.6796875, "calib/ece": 0.6518677685950413, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.5909090909090909, "calib/gap": -0.021713487972508716, "calib/mean_conf": 0.842198347107438, "calib/mu_c": 0.8247916666666666, "calib/mu_w": 0.8465051546391753, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.6478595041322314, "calib/std_conf": 0.20719590361927231, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1583.0, "completions/max_terminated_length": 1583.0, "completions/mean_length": 307.84765625, "completions/mean_terminated_length": 309.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.034133333333333335, "grad_norm": 0.2770783305168152, "learning_rate": 4.666666666666667e-06, "loss": 0.0358, "num_tokens": 7580787.0, "reward": 0.7233457565307617, "reward_std": 0.33742696046829224, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.34650811553001404, "rewards/format_reward_step": 0.8984375, "step": 32 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.45548617234293e-09, "aux_brier/mean_group_std": 0.2160932896811416, "aux_brier/mean_r": 0.5734913379692018, "aux_brier/n_active_tok": 188.5, "aux_brier/n_groups": 10.15625, "aux_brier/n_step_records": 47.125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5422324387896931, "calib/avg_num_step_conf": 5.953125, "calib/ece": 0.6710995934959348, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.6382113821138211, "calib/gap": 0.01798636801026432, "calib/mean_conf": 0.8490670731707317, "calib/mu_c": 0.8636170212765958, "calib/mu_w": 0.8456306532663315, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6645548780487804, "calib/std_conf": 0.2214576263245123, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 339.8515625, "completions/mean_terminated_length": 341.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.0352, "grad_norm": 2.2709271907806396, "learning_rate": 4.638888888888889e-06, "loss": 0.0367, "num_tokens": 7774661.0, "reward": 0.735588788986206, "reward_std": 0.37076908349990845, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.3486052453517914, "rewards/format_reward_step": 0.921875, "step": 33 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.514862030260744e-08, "aux_brier/mean_group_std": 0.22333645302988725, "aux_brier/mean_r": 0.6118464752477848, "aux_brier/n_active_tok": 177.875, "aux_brier/n_groups": 9.40625, "aux_brier/n_step_records": 44.46875, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.49775705347656707, "calib/avg_num_step_conf": 5.62109375, "calib/ece": 0.6464166666666666, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.5541666666666667, "calib/gap": 0.0071561799079213895, "calib/mean_conf": 0.8169166666666666, "calib/mu_c": 0.8227906976744187, "calib/mu_w": 0.8156345177664973, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6420833333333333, "calib/std_conf": 0.23541203535267452, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1946.0, "completions/max_terminated_length": 1946.0, "completions/mean_length": 308.40234375, "completions/mean_terminated_length": 308.40234375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.03626666666666667, "grad_norm": 6.610110759735107, "learning_rate": 4.611111111111112e-06, "loss": 0.0728, "num_tokens": 7958724.0, "reward": 0.7139061689376831, "reward_std": 0.3879733085632324, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.36343711614608765, "rewards/format_reward_step": 0.90234375, "step": 34 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.088691583579383e-08, "aux_brier/mean_group_std": 0.2220033179775186, "aux_brier/mean_r": 0.6694873177641538, "aux_brier/n_active_tok": 208.25, "aux_brier/n_groups": 11.75, "aux_brier/n_step_records": 52.0625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4580443828016643, "calib/avg_num_step_conf": 6.63671875, "calib/ece": 0.6261298387096774, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.005149283402681504, "calib/mean_conf": 0.7954846774193549, "calib/mu_c": 0.7997619047619047, "calib/mu_w": 0.7946126213592232, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6261298387096774, "calib/std_conf": 0.24226275210098042, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1544.0, "completions/max_terminated_length": 1544.0, "completions/mean_length": 342.21484375, "completions/mean_terminated_length": 343.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.037333333333333336, "grad_norm": 2.5186874866485596, "learning_rate": 4.583333333333333e-06, "loss": 0.0205, "num_tokens": 8155587.0, "reward": 0.7271343469619751, "reward_std": 0.38158851861953735, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.38509997725486755, "rewards/format_reward_step": 0.93359375, "step": 35 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.169562194457967e-09, "aux_brier/mean_group_std": 0.2203165829964861, "aux_brier/mean_r": 0.7090527466461379, "aux_brier/n_active_tok": 189.625, "aux_brier/n_groups": 10.28125, "aux_brier/n_step_records": 47.40625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4370542635658915, "calib/avg_num_step_conf": 5.97265625, "calib/ece": 0.482085020242915, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.4089068825910931, "calib/gap": -0.02508565891472858, "calib/mean_conf": 0.7729352226720648, "calib/mu_c": 0.7554666666666667, "calib/mu_w": 0.7805523255813953, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4756882591093118, "calib/std_conf": 0.24132308180527398, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 313.98046875, "completions/mean_terminated_length": 313.98046875, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.0384, "grad_norm": 1.0801153182983398, "learning_rate": 4.555555555555556e-06, "loss": 0.0461, "num_tokens": 8338678.0, "reward": 0.8887618184089661, "reward_std": 0.43464434146881104, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.4769221842288971, "rewards/format_reward_step": 0.9453125, "step": 36 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.800347457704749e-09, "aux_brier/mean_group_std": 0.1925654434566419, "aux_brier/mean_r": 0.7558997547162454, "aux_brier/n_active_tok": 197.125, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 49.28125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4782876501385894, "calib/avg_num_step_conf": 6.234375, "calib/ece": 0.508456611570248, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.33884297520661155, "calib/gap": 0.009708294836259235, "calib/mean_conf": 0.7192004132231404, "calib/mu_c": 0.7268627450980393, "calib/mu_w": 0.71715445026178, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.508456611570248, "calib/std_conf": 0.25263505945195347, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2816.0, "completions/max_terminated_length": 2816.0, "completions/mean_length": 330.3359375, "completions/mean_terminated_length": 331.6313781738281, "completions/min_length": 0.0, "completions/min_terminated_length": 17.0, "epoch": 0.039466666666666664, "grad_norm": 0.6208831071853638, "learning_rate": 4.527777777777778e-06, "loss": 0.0345, "num_tokens": 8530340.0, "reward": 0.782370924949646, "reward_std": 0.41898417472839355, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.4732334613800049, "rewards/format_reward_step": 0.9140625, "step": 37 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.332314524318036e-08, "aux_brier/mean_group_std": 0.17843746533874077, "aux_brier/mean_r": 0.7999370721329889, "aux_brier/n_active_tok": 207.75, "aux_brier/n_groups": 12.4375, "aux_brier/n_step_records": 51.9375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5330945208825206, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.40371311475409843, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.2827868852459016, "calib/gap": 0.028847512822600452, "calib/mean_conf": 0.6736311475409835, "calib/mu_c": 0.6940845070422537, "calib/mu_w": 0.6652369942196532, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3931803278688525, "calib/std_conf": 0.277502464937204, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 350.91796875, "completions/mean_terminated_length": 352.29412841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.04053333333333333, "grad_norm": 0.953909158706665, "learning_rate": 4.5e-06, "loss": 0.0602, "num_tokens": 8727063.0, "reward": 0.8888434171676636, "reward_std": 0.40952152013778687, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.5475612878799438, "rewards/format_reward_step": 0.94140625, "step": 38 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.5018593230164967e-08, "aux_brier/mean_group_std": 0.15156664724203292, "aux_brier/mean_r": 0.810969457909356, "aux_brier/n_active_tok": 192.75, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 48.1875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4874641148325359, "calib/avg_num_step_conf": 6.1484375, "calib/ece": 0.42951183673469384, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.24081632653061225, "calib/gap": -0.0030888038277511187, "calib/mean_conf": 0.6313044897959184, "calib/mu_c": 0.628909090909091, "calib/mu_w": 0.6319978947368421, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4181632653061224, "calib/std_conf": 0.2752871157578769, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 329.890625, "completions/mean_terminated_length": 331.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0416, "grad_norm": 0.23614369332790375, "learning_rate": 4.472222222222223e-06, "loss": 0.0236, "num_tokens": 8917603.0, "reward": 0.8228167295455933, "reward_std": 0.32237982749938965, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.5490795373916626, "rewards/format_reward_step": 0.94140625, "step": 39 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.045960014242844e-08, "aux_brier/mean_group_std": 0.13373408419202218, "aux_brier/mean_r": 0.8406878158306899, "aux_brier/n_active_tok": 215.625, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 53.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5196078431372548, "calib/avg_num_step_conf": 6.77734375, "calib/ece": 0.4654435483870968, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.27419354838709675, "calib/gap": 0.015691251119737126, "calib/mean_conf": 0.6616532258064516, "calib/mu_c": 0.6741176470588235, "calib/mu_w": 0.6584263959390864, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4607258064516129, "calib/std_conf": 0.2739056096056909, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2065.0, "completions/max_terminated_length": 2065.0, "completions/mean_length": 357.421875, "completions/mean_terminated_length": 357.421875, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.042666666666666665, "grad_norm": 0.7531865835189819, "learning_rate": 4.444444444444444e-06, "loss": 0.0167, "num_tokens": 9115863.0, "reward": 0.8074332475662231, "reward_std": 0.3995877206325531, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.5266082286834717, "rewards/format_reward_step": 0.9453125, "step": 40 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.874983145098047e-08, "aux_brier/mean_group_std": 0.1312267855764217, "aux_brier/mean_r": 0.8464493463466005, "aux_brier/n_active_tok": 202.125, "aux_brier/n_groups": 11.8125, "aux_brier/n_step_records": 50.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4947436245252306, "calib/avg_num_step_conf": 6.36328125, "calib/ece": 0.30325301204819277, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.1646586345381526, "calib/gap": -0.005603635377102734, "calib/mean_conf": 0.5608433734939758, "calib/mu_c": 0.5574226804123711, "calib/mu_w": 0.5630263157894738, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2372690763052209, "calib/std_conf": 0.29027146200216886, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1700.0, "completions/max_terminated_length": 1700.0, "completions/mean_length": 329.859375, "completions/mean_terminated_length": 331.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.04373333333333333, "grad_norm": 0.32473674416542053, "learning_rate": 4.416666666666667e-06, "loss": 0.0376, "num_tokens": 9307555.0, "reward": 1.0193958282470703, "reward_std": 0.39183253049850464, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6244581937789917, "rewards/format_reward_step": 0.9609375, "step": 41 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.636152155316429e-07, "aux_brier/mean_group_std": 0.11594292246522589, "aux_brier/mean_r": 0.8885825685236638, "aux_brier/n_active_tok": 182.75, "aux_brier/n_groups": 9.9375, "aux_brier/n_step_records": 45.6875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4811442989634479, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.279268, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.172, "calib/gap": -0.024231178396071917, "calib/mean_conf": 0.5247159999999998, "calib/mu_c": 0.5095957446808511, "calib/mu_w": 0.533826923076923, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.21399200000000002, "calib/std_conf": 0.30292278115717874, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1987.0, "completions/max_terminated_length": 1987.0, "completions/mean_length": 292.33203125, "completions/mean_terminated_length": 292.33203125, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.0448, "grad_norm": 0.9810383915901184, "learning_rate": 4.388888888888889e-06, "loss": 0.0275, "num_tokens": 9486760.0, "reward": 0.9959656000137329, "reward_std": 0.3806919455528259, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6166751384735107, "rewards/format_reward_step": 0.94921875, "step": 42 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.5842941802207688e-07, "aux_brier/mean_group_std": 0.09925830655635783, "aux_brier/mean_r": 0.895804819442369, "aux_brier/n_active_tok": 201.0, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 50.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48248781368478066, "calib/avg_num_step_conf": 6.37890625, "calib/ece": 0.32383132530120484, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.1686746987951807, "calib/gap": -0.024683336342300022, "calib/mean_conf": 0.517726907630522, "calib/mu_c": 0.4987931034482759, "calib/mu_w": 0.5234764397905759, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.30431325301204826, "calib/std_conf": 0.2982098492263088, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2567.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 347.87890625, "completions/mean_terminated_length": 347.87890625, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.04586666666666667, "grad_norm": 0.764629602432251, "learning_rate": 4.361111111111112e-06, "loss": 0.0761, "num_tokens": 9681041.0, "reward": 0.8540087938308716, "reward_std": 0.35527944564819336, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.6113476753234863, "rewards/format_reward_step": 0.94921875, "step": 43 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.65690757575932e-07, "aux_brier/mean_group_std": 0.0811326167114847, "aux_brier/mean_r": 0.920624391641562, "aux_brier/n_active_tok": 215.25, "aux_brier/n_groups": 13.03125, "aux_brier/n_step_records": 53.8125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4535250631085468, "calib/avg_num_step_conf": 6.765625, "calib/ece": 0.2585951417004048, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.08502024291497975, "calib/gap": -0.06511206274792636, "calib/mean_conf": 0.4246437246963562, "calib/mu_c": 0.3750847457627119, "calib/mu_w": 0.44019680851063825, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22218623481781374, "calib/std_conf": 0.2849744831802655, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 345.7109375, "completions/mean_terminated_length": 345.7109375, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.046933333333333334, "grad_norm": 0.33147311210632324, "learning_rate": 4.333333333333334e-06, "loss": 0.0725, "num_tokens": 9875863.0, "reward": 0.8748396635055542, "reward_std": 0.3271452784538269, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.647796094417572, "rewards/format_reward_step": 0.95703125, "step": 44 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.765974979086373e-07, "aux_brier/mean_group_std": 0.08300107892415177, "aux_brier/mean_r": 0.9306819157320256, "aux_brier/n_active_tok": 203.25, "aux_brier/n_groups": 11.65625, "aux_brier/n_step_records": 50.8125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.44570881226053644, "calib/avg_num_step_conf": 6.41796875, "calib/ece": 0.27049397590361446, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0642570281124498, "calib/gap": -0.046901149425287336, "calib/mean_conf": 0.37124096385542166, "calib/mu_c": 0.3384666666666667, "calib/mu_w": 0.38536781609195403, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17026506024096388, "calib/std_conf": 0.28194382321033484, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2254.0, "completions/max_terminated_length": 2254.0, "completions/mean_length": 330.65234375, "completions/mean_terminated_length": 331.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.048, "grad_norm": 0.18330220878124237, "learning_rate": 4.305555555555556e-06, "loss": 0.0437, "num_tokens": 10065558.0, "reward": 0.9469988346099854, "reward_std": 0.3695826530456543, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.6629955768585205, "rewards/format_reward_step": 0.96875, "step": 45 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.4409363044361476e-07, "aux_brier/mean_group_std": 0.08869492678138623, "aux_brier/mean_r": 0.9119198388583334, "aux_brier/n_active_tok": 225.75, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 56.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4519160951996772, "calib/avg_num_step_conf": 7.08203125, "calib/ece": 0.2646785714285714, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.10317460317460317, "calib/gap": -0.0675923356192013, "calib/mean_conf": 0.3448452380952381, "calib/mu_c": 0.295223880597015, "calib/mu_w": 0.3628162162162163, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17182539682539683, "calib/std_conf": 0.29150665654605146, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2304.0, "completions/max_terminated_length": 2304.0, "completions/mean_length": 347.5, "completions/mean_terminated_length": 347.5, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.04906666666666667, "grad_norm": 0.10056949406862259, "learning_rate": 4.277777777777778e-06, "loss": 0.0901, "num_tokens": 10259286.0, "reward": 0.9220494627952576, "reward_std": 0.24605713784694672, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.6725727319717407, "rewards/format_reward_step": 0.9765625, "step": 46 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.879659923495154e-08, "aux_brier/mean_group_std": 0.07213828201589893, "aux_brier/mean_r": 0.9364409053049382, "aux_brier/n_active_tok": 231.0, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 57.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.45559593023255807, "calib/avg_num_step_conf": 7.39453125, "calib/ece": 0.2782520325203252, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.06504065040650407, "calib/gap": -0.04196366279069774, "calib/mean_conf": 0.34264227642276424, "calib/mu_c": 0.3153488372093023, "calib/mu_w": 0.35731250000000003, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.13565040650406504, "calib/std_conf": 0.2758289540117945, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2426.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 372.76953125, "completions/mean_terminated_length": 374.23138427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.050133333333333335, "grad_norm": 0.22952257096767426, "learning_rate": 4.25e-06, "loss": 0.0817, "num_tokens": 10460691.0, "reward": 0.9732900261878967, "reward_std": 0.3508404493331909, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.6431602239608765, "rewards/format_reward_step": 0.953125, "step": 47 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.0490021446792923e-07, "aux_brier/mean_group_std": 0.06687772576759139, "aux_brier/mean_r": 0.9495411315696214, "aux_brier/n_active_tok": 200.25, "aux_brier/n_groups": 10.40625, "aux_brier/n_step_records": 50.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.39001855287569576, "calib/avg_num_step_conf": 6.3046875, "calib/ece": 0.27710317460317463, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": -0.07735584415584418, "calib/mean_conf": 0.2681349206349206, "calib/mu_c": 0.2144155844155844, "calib/mu_w": 0.29177142857142857, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11984126984126983, "calib/std_conf": 0.23697015595280196, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1802.0, "completions/max_terminated_length": 1802.0, "completions/mean_length": 327.828125, "completions/mean_terminated_length": 329.1137390136719, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0512, "grad_norm": 0.18228919804096222, "learning_rate": 4.222222222222223e-06, "loss": 0.0133, "num_tokens": 10648303.0, "reward": 0.9566383361816406, "reward_std": 0.31275951862335205, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.6781160235404968, "rewards/format_reward_step": 0.97265625, "step": 48 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.0355335841594204e-07, "aux_brier/mean_group_std": 0.07027406013126304, "aux_brier/mean_r": 0.9404911111550787, "aux_brier/n_active_tok": 221.125, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 55.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5274584929757344, "calib/avg_num_step_conf": 6.94921875, "calib/ece": 0.25136546184738956, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0321285140562249, "calib/gap": -0.004244359301830591, "calib/mean_conf": 0.24839357429718875, "calib/mu_c": 0.24563218390804598, "calib/mu_w": 0.24987654320987657, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07518072289156627, "calib/std_conf": 0.23972002938601275, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2439.0, "completions/max_terminated_length": 2439.0, "completions/mean_length": 354.859375, "completions/mean_terminated_length": 354.859375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.05226666666666667, "grad_norm": 0.07151960581541061, "learning_rate": 4.194444444444445e-06, "loss": 0.0643, "num_tokens": 10843683.0, "reward": 0.9951850771903992, "reward_std": 0.3210059404373169, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6760527491569519, "rewards/format_reward_step": 0.96484375, "step": 49 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.784847908589356e-07, "aux_brier/mean_group_std": 0.055116995024101514, "aux_brier/mean_r": 0.9598924751937429, "aux_brier/n_active_tok": 224.75, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 56.1875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4138856280395774, "calib/avg_num_step_conf": 7.078125, "calib/ece": 0.24460040816326534, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.05714285714285714, "calib/gap": -0.038986609089384505, "calib/mean_conf": 0.2233995918367347, "calib/mu_c": 0.19507462686567167, "calib/mu_w": 0.23406123595505618, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09726530612244898, "calib/std_conf": 0.2539838697269298, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2101.0, "completions/max_terminated_length": 2101.0, "completions/mean_length": 381.96484375, "completions/mean_terminated_length": 381.96484375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.05333333333333334, "grad_norm": 0.08293453603982925, "learning_rate": 4.166666666666667e-06, "loss": 0.0643, "num_tokens": 11046826.0, "reward": 0.9112589955329895, "reward_std": 0.33396610617637634, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.6762858629226685, "rewards/format_reward_step": 0.9453125, "step": 50 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.984037968533642e-07, "aux_brier/mean_group_std": 0.05989052847027103, "aux_brier/mean_r": 0.9579583998545194, "aux_brier/n_active_tok": 228.625, "aux_brier/n_groups": 13.46875, "aux_brier/n_step_records": 57.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5154973231896309, "calib/avg_num_step_conf": 7.265625, "calib/ece": 0.24108142292490115, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": 0.007696703296703272, "calib/mean_conf": 0.22200158102766795, "calib/mu_c": 0.2271428571428571, "calib/mu_w": 0.21944615384615382, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.06553359683794466, "calib/std_conf": 0.2472112133968055, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1084.0, "completions/max_terminated_length": 1084.0, "completions/mean_length": 360.72265625, "completions/mean_terminated_length": 362.13726806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.0544, "grad_norm": 0.16797003149986267, "learning_rate": 4.138888888888889e-06, "loss": 0.007, "num_tokens": 11248467.0, "reward": 0.9927654266357422, "reward_std": 0.2776499092578888, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.697624146938324, "rewards/format_reward_step": 0.98046875, "step": 51 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3240418723636793e-06, "aux_brier/mean_group_std": 0.05680214748533047, "aux_brier/mean_r": 0.9560514305138509, "aux_brier/n_active_tok": 206.375, "aux_brier/n_groups": 11.3125, "aux_brier/n_step_records": 51.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4833976833976834, "calib/avg_num_step_conf": 6.5234375, "calib/ece": 0.3255098814229249, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": -0.03864414414414416, "calib/mean_conf": 0.16927272727272727, "calib/mu_c": 0.14666666666666667, "calib/mu_w": 0.18531081081081083, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03988142292490118, "calib/std_conf": 0.2041630188551689, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 344.078125, "completions/mean_terminated_length": 345.4274597167969, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.055466666666666664, "grad_norm": 0.2156234234571457, "learning_rate": 4.111111111111111e-06, "loss": 0.0292, "num_tokens": 11444503.0, "reward": 1.0537123680114746, "reward_std": 0.3458700180053711, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6210998296737671, "rewards/format_reward_step": 0.9765625, "step": 52 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.8243179855659264e-07, "aux_brier/mean_group_std": 0.0626040370378366, "aux_brier/mean_r": 0.9475969372039725, "aux_brier/n_active_tok": 245.5, "aux_brier/n_groups": 15.84375, "aux_brier/n_step_records": 61.375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.49686520376175547, "calib/avg_num_step_conf": 7.75, "calib/ece": 0.306639344262295, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.012295081967213115, "calib/gap": -0.0031083246255660113, "calib/mean_conf": 0.16204918032786889, "calib/mu_c": 0.1602020202020202, "calib/mu_w": 0.16331034482758622, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.031475409836065574, "calib/std_conf": 0.1878891961777253, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 425.61328125, "completions/mean_terminated_length": 425.61328125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.05653333333333333, "grad_norm": 0.20086102187633514, "learning_rate": 4.083333333333334e-06, "loss": 0.064, "num_tokens": 11659284.0, "reward": 1.0182605981826782, "reward_std": 0.38351237773895264, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6277297139167786, "rewards/format_reward_step": 0.94140625, "step": 53 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.044353998762518e-06, "aux_brier/mean_group_std": 0.07300635731622351, "aux_brier/mean_r": 0.9530345758029888, "aux_brier/n_active_tok": 220.875, "aux_brier/n_groups": 13.0625, "aux_brier/n_step_records": 55.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49387254901960786, "calib/avg_num_step_conf": 7.00390625, "calib/ece": 0.3587075098814229, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": -0.005667546505781773, "calib/mean_conf": 0.12295256916996049, "calib/mu_c": 0.11990598290598292, "calib/mu_w": 0.1255735294117647, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00960474308300395, "calib/std_conf": 0.14050042452540257, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1032.0, "completions/max_terminated_length": 1032.0, "completions/mean_length": 349.11328125, "completions/mean_terminated_length": 349.11328125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.0576, "grad_norm": 0.32629361748695374, "learning_rate": 4.055555555555556e-06, "loss": -0.0086, "num_tokens": 11854889.0, "reward": 1.1027724742889404, "reward_std": 0.30096855759620667, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6064023375511169, "rewards/format_reward_step": 0.98828125, "step": 54 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1379246602216675e-06, "aux_brier/mean_group_std": 0.05071299854557984, "aux_brier/mean_r": 0.9610989423086578, "aux_brier/n_active_tok": 240.0, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 60.0, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.44242015294646875, "calib/avg_num_step_conf": 7.58203125, "calib/ece": 0.2533530120481928, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": -0.007733040935672536, "calib/mean_conf": 0.13339397590361446, "calib/mu_c": 0.12808333333333333, "calib/mu_w": 0.13581637426900586, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.036746987951807225, "calib/std_conf": 0.17100283894592416, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2919.0, "completions/max_terminated_length": 2919.0, "completions/mean_length": 394.859375, "completions/mean_terminated_length": 394.859375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.058666666666666666, "grad_norm": 0.1013939380645752, "learning_rate": 4.027777777777779e-06, "loss": 0.0937, "num_tokens": 12063797.0, "reward": 0.9612206816673279, "reward_std": 0.2767617404460907, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6886326670646667, "rewards/format_reward_step": 0.9609375, "step": 55 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.685081207433939e-07, "aux_brier/mean_group_std": 0.05100227364623314, "aux_brier/mean_r": 0.9606296541145303, "aux_brier/n_active_tok": 242.75, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 60.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4962231489495633, "calib/avg_num_step_conf": 7.68359375, "calib/ece": 0.229168, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.012, "calib/gap": -0.017303328349988184, "calib/mean_conf": 0.100192, "calib/mu_c": 0.08780281690140847, "calib/mu_w": 0.10510614525139665, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02268, "calib/std_conf": 0.1443027759123157, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2414.0, "completions/max_terminated_length": 2414.0, "completions/mean_length": 398.92578125, "completions/mean_terminated_length": 398.92578125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.05973333333333333, "grad_norm": 0.03072967194020748, "learning_rate": 4.000000000000001e-06, "loss": 0.0337, "num_tokens": 12272762.0, "reward": 0.9499537944793701, "reward_std": 0.25985878705978394, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7138776183128357, "rewards/format_reward_step": 0.97265625, "step": 56 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.040157607412056e-07, "aux_brier/mean_group_std": 0.04829101798079878, "aux_brier/mean_r": 0.9699569409987028, "aux_brier/n_active_tok": 239.375, "aux_brier/n_groups": 16.5625, "aux_brier/n_step_records": 59.84375, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5027433448486079, "calib/avg_num_step_conf": 7.5546875, "calib/ece": 0.39521311475409826, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.020491803278688523, "calib/gap": -0.0017990923254080915, "calib/mean_conf": 0.08937704918032786, "calib/mu_c": 0.0883963963963964, "calib/mu_w": 0.0901954887218045, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.014836065573770494, "calib/std_conf": 0.14308247809859057, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 414.01953125, "completions/mean_terminated_length": 415.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 19.0, "epoch": 0.0608, "grad_norm": 0.33087193965911865, "learning_rate": 3.972222222222223e-06, "loss": 0.0824, "num_tokens": 12485543.0, "reward": 1.0536420345306396, "reward_std": 0.32177644968032837, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.566131055355072, "rewards/format_reward_step": 0.94921875, "step": 57 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3840898555605996e-06, "aux_brier/mean_group_std": 0.042049013825433725, "aux_brier/mean_r": 0.9660714632088365, "aux_brier/n_active_tok": 296.875, "aux_brier/n_groups": 23.40625, "aux_brier/n_step_records": 74.21875, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4646460503650234, "calib/avg_num_step_conf": 9.4765625, "calib/ece": 0.26274166666666665, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.025771962923468117, "calib/mean_conf": 0.08450833333333334, "calib/mu_c": 0.06657534246575343, "calib/mu_w": 0.09234730538922155, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.021541666666666664, "calib/std_conf": 0.10103291508491456, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2606.0, "completions/max_terminated_length": 2606.0, "completions/mean_length": 494.7265625, "completions/mean_terminated_length": 496.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.06186666666666667, "grad_norm": 0.24724014103412628, "learning_rate": 3.944444444444445e-06, "loss": 0.1501, "num_tokens": 12718513.0, "reward": 0.9154220819473267, "reward_std": 0.31696373224258423, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.6616881489753723, "rewards/format_reward_step": 0.921875, "step": 58 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.0737893708923085e-06, "aux_brier/mean_group_std": 0.04600677352950576, "aux_brier/mean_r": 0.9679426185910952, "aux_brier/n_active_tok": 244.0, "aux_brier/n_groups": 16.0625, "aux_brier/n_step_records": 61.0, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5842910551093881, "calib/avg_num_step_conf": 7.66796875, "calib/ece": 0.31970281124498, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.0007900166158958943, "calib/mean_conf": 0.07194377510040162, "calib/mu_c": 0.07144565217391304, "calib/mu_w": 0.07223566878980893, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011084337349397589, "calib/std_conf": 0.0969266110162633, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2307.0, "completions/max_terminated_length": 2307.0, "completions/mean_length": 425.6796875, "completions/mean_terminated_length": 425.6796875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.06293333333333333, "grad_norm": 0.08634313941001892, "learning_rate": 3.916666666666667e-06, "loss": 0.0597, "num_tokens": 12933735.0, "reward": 1.0083181858062744, "reward_std": 0.2918752133846283, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6504605412483215, "rewards/format_reward_step": 0.97265625, "step": 59 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.6949528880180225e-06, "aux_brier/mean_group_std": 0.03616843391882674, "aux_brier/mean_r": 0.9745536707640898, "aux_brier/n_active_tok": 260.625, "aux_brier/n_groups": 18.9375, "aux_brier/n_step_records": 65.15625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.46282598749428094, "calib/avg_num_step_conf": 8.30078125, "calib/ece": 0.26178775510204083, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.008513268262925117, "calib/mean_conf": 0.07274285714285715, "calib/mu_c": 0.0669746835443038, "calib/mu_w": 0.07548795180722892, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0060408163265306125, "calib/std_conf": 0.08467365443337166, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 440.3203125, "completions/mean_terminated_length": 440.3203125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.064, "grad_norm": 0.017184965312480927, "learning_rate": 3.88888888888889e-06, "loss": 0.1188, "num_tokens": 13155313.0, "reward": 0.9585400819778442, "reward_std": 0.31010979413986206, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6700977087020874, "rewards/format_reward_step": 0.94921875, "step": 60 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.387941521885821e-07, "aux_brier/mean_group_std": 0.052413984962238584, "aux_brier/mean_r": 0.9652360239111484, "aux_brier/n_active_tok": 221.75, "aux_brier/n_groups": 15.0, "aux_brier/n_step_records": 55.4375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4169480519480519, "calib/avg_num_step_conf": 6.984375, "calib/ece": 0.39087599999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014322077922077923, "calib/mean_conf": 0.052084000000000005, "calib/mu_c": 0.04406363636363636, "calib/mu_w": 0.058385714285714284, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00148, "calib/std_conf": 0.04510180643832351, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2342.0, "completions/max_terminated_length": 2342.0, "completions/mean_length": 371.046875, "completions/mean_terminated_length": 371.046875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06506666666666666, "grad_norm": 0.10837318748235703, "learning_rate": 3.861111111111112e-06, "loss": 0.0767, "num_tokens": 13354365.0, "reward": 1.0542142391204834, "reward_std": 0.2764996886253357, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5684198141098022, "rewards/format_reward_step": 0.96484375, "step": 61 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.868187364002406e-07, "aux_brier/mean_group_std": 0.04381141609160259, "aux_brier/mean_r": 0.9716618393481739, "aux_brier/n_active_tok": 263.25, "aux_brier/n_groups": 21.09375, "aux_brier/n_step_records": 65.8125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4961106309420916, "calib/avg_num_step_conf": 8.4921875, "calib/ece": 0.3216856326530612, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.012113928262748486, "calib/mean_conf": 0.0677021224489796, "calib/mu_c": 0.059988764044943826, "calib/mu_w": 0.07210269230769231, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.013061224489795919, "calib/std_conf": 0.10893542482040143, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2354.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 471.04296875, "completions/mean_terminated_length": 471.04296875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.06613333333333334, "grad_norm": 0.0680285394191742, "learning_rate": 3.833333333333334e-06, "loss": 0.1161, "num_tokens": 13582032.0, "reward": 0.9762334227561951, "reward_std": 0.34210383892059326, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6236836910247803, "rewards/format_reward_step": 0.9453125, "step": 62 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.40636866733324e-06, "aux_brier/mean_group_std": 0.05836626167407911, "aux_brier/mean_r": 0.960481865817939, "aux_brier/n_active_tok": 242.0, "aux_brier/n_groups": 14.84375, "aux_brier/n_step_records": 60.5, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.46469387755102043, "calib/avg_num_step_conf": 7.89453125, "calib/ece": 0.3517943548387097, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.01288421768707481, "calib/mean_conf": 0.05941532258064517, "calib/mu_c": 0.051622448979591846, "calib/mu_w": 0.06450666666666666, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008024193548387095, "calib/std_conf": 0.0885411837329467, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2944.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 445.30078125, "completions/mean_terminated_length": 448.8070983886719, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.0672, "grad_norm": 0.5678281188011169, "learning_rate": 3.8055555555555556e-06, "loss": 0.0613, "num_tokens": 13804669.0, "reward": 1.012493371963501, "reward_std": 0.34109848737716675, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6046613454818726, "rewards/format_reward_step": 0.95703125, "step": 63 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.058569844147321e-06, "aux_brier/mean_group_std": 0.033108966515736575, "aux_brier/mean_r": 0.9747501982796533, "aux_brier/n_active_tok": 253.0, "aux_brier/n_groups": 17.96875, "aux_brier/n_step_records": 63.25, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.46250336292709177, "calib/avg_num_step_conf": 8.15625, "calib/ece": 0.44395368852459016, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.004098360655737705, "calib/gap": -0.011361864406779644, "calib/mean_conf": 0.05350532786885247, "calib/mu_c": 0.04763813559322035, "calib/mu_w": 0.059, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006926229508196722, "calib/std_conf": 0.07581863560102291, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 444.5078125, "completions/mean_terminated_length": 446.2510070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.06826666666666667, "grad_norm": 0.09891669452190399, "learning_rate": 3.777777777777778e-06, "loss": 0.0667, "num_tokens": 14022239.0, "reward": 1.0685046911239624, "reward_std": 0.32946476340293884, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5162064433097839, "rewards/format_reward_step": 0.94140625, "step": 64 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.775143360556065e-06, "aux_brier/mean_group_std": 0.03827895670426919, "aux_brier/mean_r": 0.9771603448523676, "aux_brier/n_active_tok": 206.0, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 51.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.569016290726817, "calib/avg_num_step_conf": 6.640625, "calib/ece": 0.41270861767322836, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010049043442731838, "calib/mean_conf": 0.03611027996456693, "calib/mu_c": 0.04164912280701755, "calib/mu_w": 0.03160007936428571, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.034171112056635054, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2107.0, "completions/max_terminated_length": 2107.0, "completions/mean_length": 355.828125, "completions/mean_terminated_length": 355.828125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.06933333333333333, "grad_norm": 0.033202022314071655, "learning_rate": 3.7500000000000005e-06, "loss": 0.0187, "num_tokens": 14218355.0, "reward": 1.0857701301574707, "reward_std": 0.20154376327991486, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5774555206298828, "rewards/format_reward_step": 0.984375, "step": 65 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3306250042574574e-06, "aux_brier/mean_group_std": 0.04320104418073151, "aux_brier/mean_r": 0.9743550899357948, "aux_brier/n_active_tok": 265.875, "aux_brier/n_groups": 17.1875, "aux_brier/n_step_records": 66.46875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5000348383500557, "calib/avg_num_step_conf": 8.51953125, "calib/ece": 0.325970564516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003624275362318838, "calib/mean_conf": 0.04499717741935485, "calib/mu_c": 0.042717391304347825, "calib/mu_w": 0.04634166666666666, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04443168496838503, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 462.66796875, "completions/mean_terminated_length": 468.1541748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.0704, "grad_norm": 0.3320234417915344, "learning_rate": 3.7222222222222225e-06, "loss": 0.0577, "num_tokens": 14443150.0, "reward": 0.9998792409896851, "reward_std": 0.28078022599220276, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6323295831680298, "rewards/format_reward_step": 0.96484375, "step": 66 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.064743734445096e-06, "aux_brier/mean_group_std": 0.04949975251528722, "aux_brier/mean_r": 0.9677108378406305, "aux_brier/n_active_tok": 236.375, "aux_brier/n_groups": 15.5625, "aux_brier/n_step_records": 59.09375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.502158273381295, "calib/avg_num_step_conf": 7.41796875, "calib/ece": 0.398421686746988, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009283845650752201, "calib/mean_conf": 0.043345381526104426, "calib/mu_c": 0.04386363636363637, "calib/mu_w": 0.04293525179856115, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04063345295135549, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 435.51953125, "completions/mean_terminated_length": 435.51953125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.07146666666666666, "grad_norm": 0.05349089205265045, "learning_rate": 3.694444444444445e-06, "loss": 0.0561, "num_tokens": 14659651.0, "reward": 1.0583751201629639, "reward_std": 0.26450416445732117, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5694377422332764, "rewards/format_reward_step": 0.96484375, "step": 67 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 8.650902338147581e-06, "aux_brier/mean_group_std": 0.03427101187295996, "aux_brier/mean_r": 0.9768078425432928, "aux_brier/n_active_tok": 204.5, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 51.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46068627450980393, "calib/avg_num_step_conf": 6.5703125, "calib/ece": 0.36509486166007904, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007049019607843134, "calib/mean_conf": 0.04676284584980237, "calib/mu_c": 0.0425, "calib/mu_w": 0.04954901960784314, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008300395256916995, "calib/std_conf": 0.08306335385689934, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2545.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 379.18359375, "completions/mean_terminated_length": 379.18359375, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.07253333333333334, "grad_norm": 0.48980000615119934, "learning_rate": 3.6666666666666666e-06, "loss": 0.0623, "num_tokens": 14860810.0, "reward": 1.0373156070709229, "reward_std": 0.25164324045181274, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6180124282836914, "rewards/format_reward_step": 0.984375, "step": 68 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.078681342526645e-06, "aux_brier/mean_group_std": 0.05264741637255274, "aux_brier/mean_r": 0.9663651153189144, "aux_brier/n_active_tok": 215.125, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 53.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5235158862876255, "calib/avg_num_step_conf": 6.765625, "calib/ece": 0.3296016129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007544035674470484, "calib/mean_conf": 0.04459193548387098, "calib/mu_c": 0.044117391304347824, "calib/mu_w": 0.04487179487179487, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0016129032258064516, "calib/std_conf": 0.0434310235422943, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2472.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 436.8203125, "completions/mean_terminated_length": 436.8203125, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.0736, "grad_norm": 0.05785338953137398, "learning_rate": 3.638888888888889e-06, "loss": 0.0771, "num_tokens": 15077132.0, "reward": 1.0069888830184937, "reward_std": 0.3025099039077759, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6373307704925537, "rewards/format_reward_step": 0.96875, "step": 69 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.0532646641545647e-06, "aux_brier/mean_group_std": 0.039275487480210904, "aux_brier/mean_r": 0.9749847672449794, "aux_brier/n_active_tok": 231.0, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 57.75, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5602575231481481, "calib/avg_num_step_conf": 7.3359375, "calib/ece": 0.36384333333333335, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011329166666666675, "calib/mean_conf": 0.03615666666666668, "calib/mu_c": 0.042954166666666675, "calib/mu_w": 0.031625, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04059276769190404, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2475.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 463.0703125, "completions/mean_terminated_length": 466.7165222167969, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.07466666666666667, "grad_norm": 0.02348187379539013, "learning_rate": 3.6111111111111115e-06, "loss": 0.0838, "num_tokens": 15302670.0, "reward": 0.9888066053390503, "reward_std": 0.2804641127586365, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.5880390405654907, "rewards/format_reward_step": 0.93359375, "step": 70 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.4394772646374463e-08, "aux_brier/mean_group_std": 0.04867247399872675, "aux_brier/mean_r": 0.9634075252441048, "aux_brier/n_active_tok": 231.625, "aux_brier/n_groups": 14.71875, "aux_brier/n_step_records": 57.90625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4202279202279202, "calib/avg_num_step_conf": 7.4765625, "calib/ece": 0.33001343873517786, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.011526427893094562, "calib/mean_conf": 0.039314624505928854, "calib/mu_c": 0.03193406593406593, "calib/mu_w": 0.043460493827160496, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004822134387351778, "calib/std_conf": 0.06989159069698907, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1896.0, "completions/max_terminated_length": 1896.0, "completions/mean_length": 422.97265625, "completions/mean_terminated_length": 426.30316162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.07573333333333333, "grad_norm": 0.8687294125556946, "learning_rate": 3.5833333333333335e-06, "loss": 0.0305, "num_tokens": 15515359.0, "reward": 0.9972602725028992, "reward_std": 0.2537091076374054, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6296659708023071, "rewards/format_reward_step": 0.96875, "step": 71 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.560168510720942e-06, "aux_brier/mean_group_std": 0.02920832451230697, "aux_brier/mean_r": 0.9761008591631589, "aux_brier/n_active_tok": 246.25, "aux_brier/n_groups": 16.15625, "aux_brier/n_step_records": 61.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5556539420175783, "calib/avg_num_step_conf": 7.73046875, "calib/ece": 0.3620662450592885, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.009279480519480507, "calib/mean_conf": 0.038566166007905134, "calib/mu_c": 0.044214545454545444, "calib/mu_w": 0.03493506493506494, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0046640316205533595, "calib/std_conf": 0.08305488922652862, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 421.51171875, "completions/mean_terminated_length": 421.51171875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.0768, "grad_norm": 0.30694445967674255, "learning_rate": 3.555555555555556e-06, "loss": 0.0423, "num_tokens": 15727674.0, "reward": 1.0391556024551392, "reward_std": 0.2421858310699463, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6253725290298462, "rewards/format_reward_step": 0.984375, "step": 72 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 8.052404001479552e-06, "aux_brier/mean_group_std": 0.04169540844016346, "aux_brier/mean_r": 0.9758179795916286, "aux_brier/n_active_tok": 227.875, "aux_brier/n_groups": 15.03125, "aux_brier/n_step_records": 56.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5363523187052599, "calib/avg_num_step_conf": 7.26953125, "calib/ece": 0.5017786968503938, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004129070339246813, "calib/mean_conf": 0.029717366141732288, "calib/mu_c": 0.03165185185185185, "calib/mu_w": 0.027522781512605034, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.023934978877244266, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2992.0, "completions/max_terminated_length": 2992.0, "completions/mean_length": 409.98828125, "completions/mean_terminated_length": 409.98828125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.07786666666666667, "grad_norm": 0.05047299340367317, "learning_rate": 3.5277777777777784e-06, "loss": 0.043, "num_tokens": 15939663.0, "reward": 1.1476329565048218, "reward_std": 0.26514506340026855, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4967818856239319, "rewards/format_reward_step": 0.9921875, "step": 73 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.184344703008549e-08, "aux_brier/mean_group_std": 0.023980905783726972, "aux_brier/mean_r": 0.9863200664382628, "aux_brier/n_active_tok": 241.375, "aux_brier/n_groups": 15.21875, "aux_brier/n_step_records": 60.34375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49262187088274045, "calib/avg_num_step_conf": 7.55859375, "calib/ece": 0.4146693548387097, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00381159420289855, "calib/mean_conf": 0.028879032258064517, "calib/mu_c": 0.031, "calib/mu_w": 0.02718840579710145, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03330530635381605, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2533.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 430.07421875, "completions/mean_terminated_length": 430.07421875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.07893333333333333, "grad_norm": 0.2849559485912323, "learning_rate": 3.5e-06, "loss": 0.0786, "num_tokens": 16153690.0, "reward": 1.0550175905227661, "reward_std": 0.25433430075645447, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5638206005096436, "rewards/format_reward_step": 0.96875, "step": 74 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.103603219287379e-06, "aux_brier/mean_group_std": 0.05549154691148288, "aux_brier/mean_r": 0.9642726666882724, "aux_brier/n_active_tok": 216.375, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 54.09375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6524572445800751, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.5501291338582676, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011855909466590379, "calib/mean_conf": 0.028611023622047243, "calib/mu_c": 0.03360544217687075, "calib/mu_w": 0.021749532710280375, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02749721597459955, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 369.51953125, "completions/mean_terminated_length": 370.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.08, "grad_norm": 0.9460354447364807, "learning_rate": 3.4722222222222224e-06, "loss": -0.0031, "num_tokens": 16353039.0, "reward": 1.1918749809265137, "reward_std": 0.24501259624958038, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.455000102519989, "rewards/format_reward_step": 0.9921875, "step": 75 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.4513692827854427e-06, "aux_brier/mean_group_std": 0.051347752775991504, "aux_brier/mean_r": 0.9641691588949147, "aux_brier/n_active_tok": 246.375, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 61.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.480296343001261, "calib/avg_num_step_conf": 7.69921875, "calib/ece": 0.45480158730158726, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007307692307692317, "calib/mean_conf": 0.03376984126984127, "calib/mu_c": 0.029999999999999995, "calib/mu_w": 0.03730769230769231, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0022222222222222222, "calib/std_conf": 0.042403889964268485, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2782.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 436.00390625, "completions/mean_terminated_length": 436.00390625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.08106666666666666, "grad_norm": 0.18404695391654968, "learning_rate": 3.444444444444445e-06, "loss": 0.0844, "num_tokens": 16567712.0, "reward": 1.099198818206787, "reward_std": 0.2967877984046936, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5296074151992798, "rewards/format_reward_step": 0.98046875, "step": 76 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.9130282626355086e-06, "aux_brier/mean_group_std": 0.05058887783171625, "aux_brier/mean_r": 0.9618940982608526, "aux_brier/n_active_tok": 218.125, "aux_brier/n_groups": 13.71875, "aux_brier/n_step_records": 54.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4209269752339721, "calib/avg_num_step_conf": 6.84375, "calib/ece": 0.4193650793650794, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004851340166804614, "calib/mean_conf": 0.029047619047619048, "calib/mu_c": 0.0263716814159292, "calib/mu_w": 0.031223021582733813, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.033044668456177394, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2614.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 388.171875, "completions/mean_terminated_length": 389.69415283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.08213333333333334, "grad_norm": 0.07342797517776489, "learning_rate": 3.416666666666667e-06, "loss": 0.0574, "num_tokens": 16771748.0, "reward": 1.0746798515319824, "reward_std": 0.28718793392181396, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5643445253372192, "rewards/format_reward_step": 0.984375, "step": 77 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.696898912166404e-06, "aux_brier/mean_group_std": 0.046484346640904464, "aux_brier/mean_r": 0.970809476392736, "aux_brier/n_active_tok": 233.5, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 58.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5840215095064336, "calib/avg_num_step_conf": 7.375, "calib/ece": 0.479112, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0068533384546443955, "calib/mean_conf": 0.028887999999999997, "calib/mu_c": 0.03225984251968504, "calib/mu_w": 0.025406504065040646, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.024696952362589196, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2616.0, "completions/max_terminated_length": 2616.0, "completions/mean_length": 468.4765625, "completions/mean_terminated_length": 468.4765625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.0832, "grad_norm": 0.2748223543167114, "learning_rate": 3.3888888888888893e-06, "loss": 0.0472, "num_tokens": 16999702.0, "reward": 1.1009531021118164, "reward_std": 0.3101786971092224, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.497562050819397, "rewards/format_reward_step": 0.953125, "step": 78 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.572590723781687e-07, "aux_brier/mean_group_std": 0.04789018717446725, "aux_brier/mean_r": 0.9645207600269317, "aux_brier/n_active_tok": 249.0, "aux_brier/n_groups": 15.5, "aux_brier/n_step_records": 62.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5566919191919192, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.49617857142857147, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00297045454545454, "calib/mean_conf": 0.027630952380952385, "calib/mu_c": 0.029045454545454544, "calib/mu_w": 0.026075000000000004, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.020211449545833277, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 482.28125, "completions/mean_terminated_length": 482.28125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.08426666666666667, "grad_norm": 0.2128184586763382, "learning_rate": 3.3611111111111117e-06, "loss": 0.0467, "num_tokens": 17229542.0, "reward": 1.132199764251709, "reward_std": 0.28818950057029724, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.49754947423934937, "rewards/format_reward_step": 0.984375, "step": 79 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.174135792287757e-06, "aux_brier/mean_group_std": 0.06495822288625715, "aux_brier/mean_r": 0.9570226080739604, "aux_brier/n_active_tok": 239.0, "aux_brier/n_groups": 15.15625, "aux_brier/n_step_records": 59.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5057889822595705, "calib/avg_num_step_conf": 7.47265625, "calib/ece": 0.4432283464566929, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00035107376283846636, "calib/mean_conf": 0.025275590551181105, "calib/mu_c": 0.02546218487394958, "calib/mu_w": 0.025111111111111115, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.019715566880187718, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2224.0, "completions/max_terminated_length": 2224.0, "completions/mean_length": 393.703125, "completions/mean_terminated_length": 393.703125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.08533333333333333, "grad_norm": 0.45626622438430786, "learning_rate": 3.3333333333333333e-06, "loss": 0.0332, "num_tokens": 17432490.0, "reward": 1.0984364748001099, "reward_std": 0.2826603353023529, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5499960780143738, "rewards/format_reward_step": 0.9921875, "step": 80 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.332123914540075e-06, "aux_brier/mean_group_std": 0.05346998538001507, "aux_brier/mean_r": 0.9647629380884053, "aux_brier/n_active_tok": 250.5, "aux_brier/n_groups": 15.625, "aux_brier/n_step_records": 62.625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5023799946366317, "calib/avg_num_step_conf": 8.0625, "calib/ece": 0.44011428571428574, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.007569455618128187, "calib/mean_conf": 0.031069387755102044, "calib/mu_c": 0.026991150442477876, "calib/mu_w": 0.03456060606060606, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004979591836734694, "calib/std_conf": 0.06577028112466142, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2342.0, "completions/max_terminated_length": 2342.0, "completions/mean_length": 459.61328125, "completions/mean_terminated_length": 463.2322692871094, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.0864, "grad_norm": 0.2887077331542969, "learning_rate": 3.3055555555555558e-06, "loss": 0.077, "num_tokens": 17656399.0, "reward": 1.0515084266662598, "reward_std": 0.2824578285217285, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5341585874557495, "rewards/format_reward_step": 0.953125, "step": 81 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.6703866285469076e-06, "aux_brier/mean_group_std": 0.03935703469611457, "aux_brier/mean_r": 0.9688550529060806, "aux_brier/n_active_tok": 224.0, "aux_brier/n_groups": 14.5625, "aux_brier/n_step_records": 56.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5902911376170322, "calib/avg_num_step_conf": 7.01953125, "calib/ece": 0.5311908366533865, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004536885981787867, "calib/mean_conf": 0.019406772908366534, "calib/mu_c": 0.021449275362318842, "calib/mu_w": 0.016912389380530975, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00039840637450199205, "calib/std_conf": 0.015356439464022069, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2759.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 417.1796875, "completions/mean_terminated_length": 417.1796875, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.08746666666666666, "grad_norm": 0.08206015080213547, "learning_rate": 3.277777777777778e-06, "loss": 0.0486, "num_tokens": 17868749.0, "reward": 1.1433264017105103, "reward_std": 0.2854713201522827, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.46393078565597534, "rewards/format_reward_step": 0.9765625, "step": 82 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3666570329196048e-05, "aux_brier/mean_group_std": 0.05717790892446513, "aux_brier/mean_r": 0.9562057854616453, "aux_brier/n_active_tok": 251.75, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 62.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46120606875360093, "calib/avg_num_step_conf": 7.87109375, "calib/ece": 0.48732000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002982523526022667, "calib/mean_conf": 0.02068, "calib/mu_c": 0.019212598425196844, "calib/mu_w": 0.02219512195121951, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01408323826397892, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2036.0, "completions/max_terminated_length": 2036.0, "completions/mean_length": 467.68359375, "completions/mean_terminated_length": 471.36614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.08853333333333334, "grad_norm": 0.28322744369506836, "learning_rate": 3.2500000000000002e-06, "loss": 0.0183, "num_tokens": 18095740.0, "reward": 1.1100877523422241, "reward_std": 0.2294122874736786, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.4950386583805084, "rewards/format_reward_step": 0.97265625, "step": 83 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3964810212652168e-06, "aux_brier/mean_group_std": 0.05856871356778768, "aux_brier/mean_r": 0.9582298212054214, "aux_brier/n_active_tok": 225.625, "aux_brier/n_groups": 12.8125, "aux_brier/n_step_records": 56.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5161845730027548, "calib/avg_num_step_conf": 7.0703125, "calib/ece": 0.4563083003952569, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 5.922865013774023e-05, "calib/mean_conf": 0.021952569169960474, "calib/mu_c": 0.021983471074380166, "calib/mu_w": 0.021924242424242426, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01776787424143487, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2346.0, "completions/max_terminated_length": 2346.0, "completions/mean_length": 410.5625, "completions/mean_terminated_length": 410.5625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.0896, "grad_norm": 0.006935984827578068, "learning_rate": 3.2222222222222227e-06, "loss": 0.0606, "num_tokens": 18306764.0, "reward": 1.1007013320922852, "reward_std": 0.2433372586965561, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5356179475784302, "rewards/format_reward_step": 0.98828125, "step": 84 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.41343224375046e-06, "aux_brier/mean_group_std": 0.0658378648923139, "aux_brier/mean_r": 0.956898890554232, "aux_brier/n_active_tok": 251.375, "aux_brier/n_groups": 16.5625, "aux_brier/n_step_records": 62.84375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5936186974789915, "calib/avg_num_step_conf": 8.078125, "calib/ece": 0.4594254655870446, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.004857058167016801, "calib/mean_conf": 0.028995587044534414, "calib/mu_c": 0.031512605042016806, "calib/mu_w": 0.026655546875000005, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003319838056680162, "calib/std_conf": 0.08079508902204688, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 468.23828125, "completions/mean_terminated_length": 471.9252014160156, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.09066666666666667, "grad_norm": 0.010801228694617748, "learning_rate": 3.1944444444444443e-06, "loss": 0.0932, "num_tokens": 18534457.0, "reward": 1.071960210800171, "reward_std": 0.23767264187335968, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5144033432006836, "rewards/format_reward_step": 0.95703125, "step": 85 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.5741469434120745e-06, "aux_brier/mean_group_std": 0.059585150501863936, "aux_brier/mean_r": 0.9586120814670223, "aux_brier/n_active_tok": 232.875, "aux_brier/n_groups": 14.46875, "aux_brier/n_step_records": 58.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.512226347458712, "calib/avg_num_step_conf": 7.27734375, "calib/ece": 0.4018181818181818, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010203559083344013, "calib/mean_conf": 0.021897233201581028, "calib/mu_c": 0.02130841121495327, "calib/mu_w": 0.02232876712328767, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.015126518878198464, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1749.0, "completions/max_terminated_length": 1749.0, "completions/mean_length": 438.4609375, "completions/mean_terminated_length": 440.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.09173333333333333, "grad_norm": 0.1242508813738823, "learning_rate": 3.1666666666666667e-06, "loss": 0.0276, "num_tokens": 18752215.0, "reward": 1.0550609827041626, "reward_std": 0.28246423602104187, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5796191692352295, "rewards/format_reward_step": 0.9765625, "step": 86 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.154137164969154e-06, "aux_brier/mean_group_std": 0.07578767623087967, "aux_brier/mean_r": 0.9541173446474975, "aux_brier/n_active_tok": 227.125, "aux_brier/n_groups": 14.96875, "aux_brier/n_step_records": 56.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.540683023872679, "calib/avg_num_step_conf": 7.453125, "calib/ece": 0.5616465863453816, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001502652519893899, "calib/mean_conf": 0.02068273092369478, "calib/mu_c": 0.021310344827586206, "calib/mu_w": 0.019807692307692307, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.013530262932848557, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2245.0, "completions/max_terminated_length": 2245.0, "completions/mean_length": 398.46484375, "completions/mean_terminated_length": 404.7897033691406, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0928, "grad_norm": 0.5288179516792297, "learning_rate": 3.138888888888889e-06, "loss": 0.0143, "num_tokens": 18959718.0, "reward": 1.1551870107650757, "reward_std": 0.286136269569397, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.42543554306030273, "rewards/format_reward_step": 0.96484375, "step": 87 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.120458337002585e-07, "aux_brier/mean_group_std": 0.055331605233758414, "aux_brier/mean_r": 0.9598075037022034, "aux_brier/n_active_tok": 240.25, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 60.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5096358403203604, "calib/avg_num_step_conf": 7.80078125, "calib/ece": 0.45916600790513834, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018713552746840272, "calib/mean_conf": 0.023047430830039527, "calib/mu_c": 0.024016393442622956, "calib/mu_w": 0.02214503816793893, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0158490197238091, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 448.9921875, "completions/mean_terminated_length": 450.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.09386666666666667, "grad_norm": 0.1636967957019806, "learning_rate": 3.1111111111111116e-06, "loss": 0.0467, "num_tokens": 19184508.0, "reward": 1.1021703481674194, "reward_std": 0.20122531056404114, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5336815118789673, "rewards/format_reward_step": 0.984375, "step": 88 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.4262653590678696e-06, "aux_brier/mean_group_std": 0.07005360666593853, "aux_brier/mean_r": 0.9535259534192914, "aux_brier/n_active_tok": 230.75, "aux_brier/n_groups": 13.125, "aux_brier/n_step_records": 57.6875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5213982660025825, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.4325390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005417204697780245, "calib/mean_conf": 0.0244921875, "calib/mu_c": 0.02478632478632479, "calib/mu_w": 0.024244604316546764, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.01429788888139937, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 445.78125, "completions/mean_terminated_length": 447.5294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.09493333333333333, "grad_norm": 0.061887916177511215, "learning_rate": 3.0833333333333336e-06, "loss": 0.0064, "num_tokens": 19407516.0, "reward": 1.0940849781036377, "reward_std": 0.17070043087005615, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5638397932052612, "rewards/format_reward_step": 0.9921875, "step": 89 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.512320092034394e-06, "aux_brier/mean_group_std": 0.10073570940345282, "aux_brier/mean_r": 0.9266705959774814, "aux_brier/n_active_tok": 250.75, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 62.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5735608345902463, "calib/avg_num_step_conf": 7.84375, "calib/ece": 0.5133201581027668, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004210658622423331, "calib/mean_conf": 0.02422924901185771, "calib/mu_c": 0.026176470588235294, "calib/mu_w": 0.021965811965811963, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015085357080634389, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 448.97265625, "completions/mean_terminated_length": 448.97265625, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.096, "grad_norm": 0.006909631192684174, "learning_rate": 3.055555555555556e-06, "loss": 0.0318, "num_tokens": 19625773.0, "reward": 1.1444084644317627, "reward_std": 0.22809094190597534, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.48388397693634033, "rewards/format_reward_step": 0.984375, "step": 90 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.808361265706829e-06, "aux_brier/mean_group_std": 0.05787001523135353, "aux_brier/mean_r": 0.966620922965862, "aux_brier/n_active_tok": 244.625, "aux_brier/n_groups": 13.375, "aux_brier/n_step_records": 61.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5891415283250436, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.52256, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0033059879852722594, "calib/mean_conf": 0.025439999999999997, "calib/mu_c": 0.02693430656934306, "calib/mu_w": 0.0236283185840708, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014338981832752283, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2203.0, "completions/max_terminated_length": 2203.0, "completions/mean_length": 448.3203125, "completions/mean_terminated_length": 450.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.09706666666666666, "grad_norm": 0.014485384337604046, "learning_rate": 3.0277777777777776e-06, "loss": 0.0269, "num_tokens": 19848255.0, "reward": 1.1486003398895264, "reward_std": 0.2162783443927765, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.4694015681743622, "rewards/format_reward_step": 0.9765625, "step": 91 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.149870861284825e-06, "aux_brier/mean_group_std": 0.027356117382574357, "aux_brier/mean_r": 0.9824328463670696, "aux_brier/n_active_tok": 229.0, "aux_brier/n_groups": 14.375, "aux_brier/n_step_records": 57.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5204615384615384, "calib/avg_num_step_conf": 7.33203125, "calib/ece": 0.46485098039215683, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004266153846153847, "calib/mean_conf": 0.025345098039215686, "calib/mu_c": 0.02752, "calib/mu_w": 0.023253846153846153, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.03323143039604012, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1029.0, "completions/max_terminated_length": 1029.0, "completions/mean_length": 393.69140625, "completions/mean_terminated_length": 395.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.09813333333333334, "grad_norm": 0.025894714519381523, "learning_rate": 3e-06, "loss": -0.0019, "num_tokens": 20055760.0, "reward": 1.1168794631958008, "reward_std": 0.24395212531089783, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5300179719924927, "rewards/format_reward_step": 0.9921875, "step": 92 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.4370384449644575e-06, "aux_brier/mean_group_std": 0.056055496449707885, "aux_brier/mean_r": 0.9675858516883075, "aux_brier/n_active_tok": 269.75, "aux_brier/n_groups": 18.03125, "aux_brier/n_step_records": 67.4375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.47361735391831156, "calib/avg_num_step_conf": 8.45703125, "calib/ece": 0.4544233870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0021065077193668236, "calib/mean_conf": 0.026221774193548392, "calib/mu_c": 0.02512605042016806, "calib/mu_w": 0.027232558139534884, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004032258064516129, "calib/std_conf": 0.01307682054483867, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2995.0, "completions/max_terminated_length": 2995.0, "completions/mean_length": 489.22265625, "completions/mean_terminated_length": 491.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.0992, "grad_norm": 0.03542132303118706, "learning_rate": 2.9722222222222225e-06, "loss": 0.0628, "num_tokens": 20286777.0, "reward": 1.080827236175537, "reward_std": 0.26389071345329285, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5264338850975037, "rewards/format_reward_step": 0.96875, "step": 93 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.616237748026551e-06, "aux_brier/mean_group_std": 0.06548742352883224, "aux_brier/mean_r": 0.9555905112583005, "aux_brier/n_active_tok": 228.75, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 57.1875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5670480120030008, "calib/avg_num_step_conf": 7.15625, "calib/ece": 0.48303952569169967, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0039129157289322335, "calib/mean_conf": 0.026841897233201583, "calib/mu_c": 0.02875968992248062, "calib/mu_w": 0.024846774193548388, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014088606655990703, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 426.56640625, "completions/mean_terminated_length": 426.56640625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.10026666666666667, "grad_norm": 0.006841793190687895, "learning_rate": 2.944444444444445e-06, "loss": 0.073, "num_tokens": 20504658.0, "reward": 1.12615966796875, "reward_std": 0.19835515320301056, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.512451171875, "rewards/format_reward_step": 0.98828125, "step": 94 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.7462224184139341e-06, "aux_brier/mean_group_std": 0.07639294718765116, "aux_brier/mean_r": 0.9508508828057519, "aux_brier/n_active_tok": 246.75, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 61.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4827979409656611, "calib/avg_num_step_conf": 7.75, "calib/ece": 0.5620238095238095, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010835994005343116, "calib/mean_conf": 0.03091269841269841, "calib/mu_c": 0.030469798657718115, "calib/mu_w": 0.03155339805825243, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008333333333333333, "calib/std_conf": 0.019668894079380023, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 452.68359375, "completions/mean_terminated_length": 454.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.10133333333333333, "grad_norm": 0.009349067695438862, "learning_rate": 2.916666666666667e-06, "loss": 0.0141, "num_tokens": 20726673.0, "reward": 1.1833415031433105, "reward_std": 0.22875583171844482, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4364910125732422, "rewards/format_reward_step": 0.984375, "step": 95 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.968594871093915e-06, "aux_brier/mean_group_std": 0.06872664148503413, "aux_brier/mean_r": 0.9523272980180933, "aux_brier/n_active_tok": 224.125, "aux_brier/n_groups": 12.125, "aux_brier/n_step_records": 56.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5447864945382324, "calib/avg_num_step_conf": 7.0546875, "calib/ece": 0.5976594488188977, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019566037735849057, "calib/mean_conf": 0.0283248031496063, "calib/mu_c": 0.0290566037735849, "calib/mu_w": 0.027099999999999996, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01253930101094713, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1992.0, "completions/max_terminated_length": 1992.0, "completions/mean_length": 412.9375, "completions/mean_terminated_length": 412.9375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.1024, "grad_norm": 0.009241505526006222, "learning_rate": 2.888888888888889e-06, "loss": -0.0, "num_tokens": 20938201.0, "reward": 1.2167165279388428, "reward_std": 0.17452289164066315, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.40592920780181885, "rewards/format_reward_step": 0.98828125, "step": 96 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.801826086642258e-06, "aux_brier/mean_group_std": 0.04182444767990009, "aux_brier/mean_r": 0.9757240453047343, "aux_brier/n_active_tok": 249.125, "aux_brier/n_groups": 15.90625, "aux_brier/n_step_records": 62.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6404476670870114, "calib/avg_num_step_conf": 7.78515625, "calib/ece": 0.48678571428571427, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005648171500630513, "calib/mean_conf": 0.029880952380952383, "calib/mu_c": 0.03261538461538461, "calib/mu_w": 0.026967213114754095, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.013612615030987228, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 429.21484375, "completions/mean_terminated_length": 430.8980712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.10346666666666667, "grad_norm": 0.14593623578548431, "learning_rate": 2.861111111111111e-06, "loss": 0.0241, "num_tokens": 21153152.0, "reward": 1.1242269277572632, "reward_std": 0.2638818025588989, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5047203302383423, "rewards/format_reward_step": 0.98046875, "step": 97 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.996638026888434e-06, "aux_brier/mean_group_std": 0.08093589092060835, "aux_brier/mean_r": 0.9443325923255121, "aux_brier/n_active_tok": 254.25, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 63.5625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5582205029013539, "calib/avg_num_step_conf": 8.14453125, "calib/ece": 0.5277689243027888, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025609284332688553, "calib/mean_conf": 0.03398406374501992, "calib/mu_c": 0.0351063829787234, "calib/mu_w": 0.032545454545454544, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014059624917069644, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2711.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 456.859375, "completions/mean_terminated_length": 458.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.10453333333333334, "grad_norm": 0.03403199464082718, "learning_rate": 2.8333333333333335e-06, "loss": 0.0727, "num_tokens": 21376292.0, "reward": 1.1577739715576172, "reward_std": 0.2801635265350342, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4670332074165344, "rewards/format_reward_step": 0.98046875, "step": 98 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.6811854322007491e-06, "aux_brier/mean_group_std": 0.03440397797157451, "aux_brier/mean_r": 0.9778653646278308, "aux_brier/n_active_tok": 267.25, "aux_brier/n_groups": 17.375, "aux_brier/n_step_records": 66.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4395824239778845, "calib/avg_num_step_conf": 8.37109375, "calib/ece": 0.27311857707509885, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0022383966244725675, "calib/mean_conf": 0.039134387351778654, "calib/mu_c": 0.037594936708860764, "calib/mu_w": 0.03983333333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.017172851866403924, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 506.23046875, "completions/mean_terminated_length": 506.23046875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.1056, "grad_norm": 0.009494793601334095, "learning_rate": 2.805555555555556e-06, "loss": 0.0316, "num_tokens": 21611687.0, "reward": 0.9780057668685913, "reward_std": 0.2294953614473343, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.7010855674743652, "rewards/format_reward_step": 0.98828125, "step": 99 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.011528835834444e-06, "aux_brier/mean_group_std": 0.08616607958938509, "aux_brier/mean_r": 0.9386765459259507, "aux_brier/n_active_tok": 257.5, "aux_brier/n_groups": 15.78125, "aux_brier/n_step_records": 64.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5979155944415853, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.42842, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005672928461142562, "calib/mean_conf": 0.03558, "calib/mu_c": 0.03862068965517241, "calib/mu_w": 0.03294776119402985, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014716100026841351, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2438.0, "completions/max_terminated_length": 2438.0, "completions/mean_length": 461.5, "completions/mean_terminated_length": 463.3098449707031, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.10666666666666667, "grad_norm": 0.05984112247824669, "learning_rate": 2.7777777777777783e-06, "loss": 0.0861, "num_tokens": 21837239.0, "reward": 1.0806536674499512, "reward_std": 0.26517045497894287, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5569897294044495, "rewards/format_reward_step": 0.9765625, "step": 100 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2493385881939822e-06, "aux_brier/mean_group_std": 0.041907449987532125, "aux_brier/mean_r": 0.9751555650940253, "aux_brier/n_active_tok": 276.5, "aux_brier/n_groups": 16.40625, "aux_brier/n_step_records": 69.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5376787495843033, "calib/avg_num_step_conf": 8.640625, "calib/ece": 0.3486904761904763, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002274027269704028, "calib/mean_conf": 0.036230158730158724, "calib/mu_c": 0.03762886597938145, "calib/mu_w": 0.03535483870967742, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.016513420563437593, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 490.14453125, "completions/mean_terminated_length": 490.14453125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.10773333333333333, "grad_norm": 0.011581323109567165, "learning_rate": 2.7500000000000004e-06, "loss": 0.0924, "num_tokens": 22069708.0, "reward": 1.0301841497421265, "reward_std": 0.23364655673503876, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6285492181777954, "rewards/format_reward_step": 0.98046875, "step": 101 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -7.214122758822383e-07, "aux_brier/mean_group_std": 0.06669447807464005, "aux_brier/mean_r": 0.9520474488679199, "aux_brier/n_active_tok": 234.375, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 58.59375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5959164292497625, "calib/avg_num_step_conf": 7.38671875, "calib/ece": 0.6086205533596838, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0045531135531135455, "calib/mean_conf": 0.03169565217391304, "calib/mu_c": 0.03333333333333333, "calib/mu_w": 0.028780219780219787, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01405150959177948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2497.0, "completions/max_terminated_length": 2497.0, "completions/mean_length": 397.5703125, "completions/mean_terminated_length": 399.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.1088, "grad_norm": 0.00953032448887825, "learning_rate": 2.7222222222222224e-06, "loss": -0.0035, "num_tokens": 22278182.0, "reward": 1.2299764156341553, "reward_std": 0.15418517589569092, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.39646828174591064, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.623730196635535e-06, "aux_brier/mean_group_std": 0.06268301511030247, "aux_brier/mean_r": 0.962447104480166, "aux_brier/n_active_tok": 266.0, "aux_brier/n_groups": 17.375, "aux_brier/n_step_records": 66.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5176478083588175, "calib/avg_num_step_conf": 8.3359375, "calib/ece": 0.5344545454545454, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007089067278287509, "calib/mean_conf": 0.034715415019762844, "calib/mu_c": 0.035020833333333334, "calib/mu_w": 0.034311926605504584, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014682342636720714, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2412.0, "completions/max_terminated_length": 2412.0, "completions/mean_length": 501.39453125, "completions/mean_terminated_length": 503.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.10986666666666667, "grad_norm": 0.06882353872060776, "learning_rate": 2.6944444444444444e-06, "loss": -0.0031, "num_tokens": 22511091.0, "reward": 1.1725845336914062, "reward_std": 0.20618131756782532, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.4637756049633026, "rewards/format_reward_step": 0.98828125, "step": 103 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1690783303652807e-05, "aux_brier/mean_group_std": 0.05171970853851588, "aux_brier/mean_r": 0.9686342693300826, "aux_brier/n_active_tok": 247.25, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 61.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5647796352583586, "calib/avg_num_step_conf": 7.95703125, "calib/ece": 0.4087747035573123, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021105623100303936, "calib/mean_conf": 0.03391304347826087, "calib/mu_c": 0.03508928571428572, "calib/mu_w": 0.032978723404255325, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014367575340746352, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2381.0, "completions/max_terminated_length": 2381.0, "completions/mean_length": 449.234375, "completions/mean_terminated_length": 452.7716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.11093333333333333, "grad_norm": 0.07084021717309952, "learning_rate": 2.666666666666667e-06, "loss": 0.0049, "num_tokens": 22732775.0, "reward": 1.0746469497680664, "reward_std": 0.2366853654384613, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5798375010490417, "rewards/format_reward_step": 0.984375, "step": 104 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.3962614099113395e-07, "aux_brier/mean_group_std": 0.03452205577012924, "aux_brier/mean_r": 0.97974184310751, "aux_brier/n_active_tok": 255.875, "aux_brier/n_groups": 17.125, "aux_brier/n_step_records": 63.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.42881355932203397, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.432298814229249, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004502272441933471, "calib/mean_conf": 0.03655494071146245, "calib/mu_c": 0.03415254237288135, "calib/mu_w": 0.03865481481481482, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012252964426877472, "calib/std_conf": 0.015366233491518097, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 461.1796875, "completions/mean_terminated_length": 461.1796875, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.112, "grad_norm": 0.009009039960801601, "learning_rate": 2.6388888888888893e-06, "loss": 0.0727, "num_tokens": 22956597.0, "reward": 1.0923669338226318, "reward_std": 0.24024072289466858, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5569679141044617, "rewards/format_reward_step": 0.984375, "step": 105 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -8.847564145791909e-06, "aux_brier/mean_group_std": 0.06365965600698384, "aux_brier/mean_r": 0.958427444866737, "aux_brier/n_active_tok": 241.75, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 60.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5625705329153605, "calib/avg_num_step_conf": 7.5703125, "calib/ece": 0.3918156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003337304075235116, "calib/mean_conf": 0.03955686274509804, "calib/mu_c": 0.04145454545454545, "calib/mu_w": 0.038117241379310336, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.013752279738311352, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2198.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 438.31640625, "completions/mean_terminated_length": 438.31640625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.11306666666666666, "grad_norm": 0.007643554359674454, "learning_rate": 2.6111111111111113e-06, "loss": 0.0252, "num_tokens": 23173390.0, "reward": 1.0748772621154785, "reward_std": 0.22851277887821198, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5963842272758484, "rewards/format_reward_step": 0.9921875, "step": 106 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.34117809899881e-06, "aux_brier/mean_group_std": 0.05989411908267228, "aux_brier/mean_r": 0.9628471575134833, "aux_brier/n_active_tok": 251.375, "aux_brier/n_groups": 14.5625, "aux_brier/n_step_records": 62.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6133786141575275, "calib/avg_num_step_conf": 7.921875, "calib/ece": 0.495984251968504, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005618145563310056, "calib/mean_conf": 0.039448818897637794, "calib/mu_c": 0.04205882352941176, "calib/mu_w": 0.0364406779661017, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014644720992074445, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 449.125, "completions/mean_terminated_length": 449.125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.11413333333333334, "grad_norm": 0.0080488296225667, "learning_rate": 2.5833333333333337e-06, "loss": 0.0252, "num_tokens": 23392982.0, "reward": 1.1503819227218628, "reward_std": 0.25812166929244995, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.4999653995037079, "rewards/format_reward_step": 0.98828125, "step": 107 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.11153050233459e-06, "aux_brier/mean_group_std": 0.0811696201758042, "aux_brier/mean_r": 0.9475855806344488, "aux_brier/n_active_tok": 267.75, "aux_brier/n_groups": 15.21875, "aux_brier/n_step_records": 66.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6236570247933885, "calib/avg_num_step_conf": 8.39453125, "calib/ece": 0.6093517786561264, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007393939393939397, "calib/mean_conf": 0.04282213438735178, "calib/mu_c": 0.045393939393939396, "calib/mu_w": 0.038, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.017201548331752234, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1900.0, "completions/max_terminated_length": 1900.0, "completions/mean_length": 477.796875, "completions/mean_terminated_length": 477.796875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.1152, "grad_norm": 0.0073010846972465515, "learning_rate": 2.5555555555555557e-06, "loss": 0.0032, "num_tokens": 23618530.0, "reward": 1.2426183223724365, "reward_std": 0.2211112380027771, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.4001609683036804, "rewards/format_reward_step": 0.98828125, "step": 108 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.1203846021510273e-06, "aux_brier/mean_group_std": 0.04565737504777914, "aux_brier/mean_r": 0.9744196508989337, "aux_brier/n_active_tok": 271.375, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 67.84375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5828637615827699, "calib/avg_num_step_conf": 8.79296875, "calib/ece": 0.4364268774703557, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0036446280991735552, "calib/mean_conf": 0.04183399209486167, "calib/mu_c": 0.04373553719008264, "calib/mu_w": 0.04009090909090909, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016523671804807218, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2665.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 469.89453125, "completions/mean_terminated_length": 473.594482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.11626666666666667, "grad_norm": 0.2555139362812042, "learning_rate": 2.5277777777777778e-06, "loss": -0.0407, "num_tokens": 23843423.0, "reward": 1.1055392026901245, "reward_std": 0.1596943885087967, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.554969310760498, "rewards/format_reward_step": 0.98828125, "step": 109 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.658392951106816e-06, "aux_brier/mean_group_std": 0.053848758224179144, "aux_brier/mean_r": 0.9650792494076447, "aux_brier/n_active_tok": 223.625, "aux_brier/n_groups": 12.75, "aux_brier/n_step_records": 55.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.43342462180490354, "calib/avg_num_step_conf": 7.4140625, "calib/ece": 0.394044, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004108763693270727, "calib/mean_conf": 0.039555999999999994, "calib/mu_c": 0.037222222222222226, "calib/mu_w": 0.04133098591549295, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0008, "calib/std_conf": 0.013373363974707337, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 449.1015625, "completions/mean_terminated_length": 452.6377868652344, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.11733333333333333, "grad_norm": 0.16124384105205536, "learning_rate": 2.5e-06, "loss": 0.0469, "num_tokens": 24063313.0, "reward": 1.051352620124817, "reward_std": 0.29947662353515625, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5804105997085571, "rewards/format_reward_step": 0.96875, "step": 110 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.069663242178834e-06, "aux_brier/mean_group_std": 0.05618775918637486, "aux_brier/mean_r": 0.9636342183906583, "aux_brier/n_active_tok": 264.5, "aux_brier/n_groups": 17.9375, "aux_brier/n_step_records": 66.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.560864972348226, "calib/avg_num_step_conf": 8.29296875, "calib/ece": 0.4334409448818898, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.010332069844031573, "calib/mean_conf": 0.04293700787401575, "calib/mu_c": 0.048347107438016526, "calib/mu_w": 0.03801503759398495, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06149799920328963, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 486.4765625, "completions/mean_terminated_length": 486.4765625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.1184, "grad_norm": 0.008153988048434258, "learning_rate": 2.4722222222222226e-06, "loss": 0.018, "num_tokens": 24295259.0, "reward": 1.102805495262146, "reward_std": 0.2584152817726135, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5518468618392944, "rewards/format_reward_step": 0.984375, "step": 111 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.3048599009655177e-06, "aux_brier/mean_group_std": 0.10069022026117411, "aux_brier/mean_r": 0.9255774714505626, "aux_brier/n_active_tok": 234.625, "aux_brier/n_groups": 14.0625, "aux_brier/n_step_records": 58.65625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6405714285714286, "calib/avg_num_step_conf": 7.46484375, "calib/ece": 0.4636565254098406, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005920413667698403, "calib/mean_conf": 0.03921200048657371, "calib/mu_c": 0.042160413667698404, "calib/mu_w": 0.03624, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.00043824701195219125, "calib/std_conf": 0.014251539851051709, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2529.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 516.875, "completions/mean_terminated_length": 516.875, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.11946666666666667, "grad_norm": 0.008149411529302597, "learning_rate": 2.4444444444444447e-06, "loss": 0.0401, "num_tokens": 24535499.0, "reward": 1.1104958057403564, "reward_std": 0.2235538810491562, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5279207229614258, "rewards/format_reward_step": 0.97265625, "step": 112 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.7379988755950038e-06, "aux_brier/mean_group_std": 0.07673422997697633, "aux_brier/mean_r": 0.9488373491836689, "aux_brier/n_active_tok": 235.0, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 58.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5268595041322314, "calib/avg_num_step_conf": 7.34375, "calib/ece": 0.49160784313725486, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007038361909460972, "calib/mean_conf": 0.04423529411764706, "calib/mu_c": 0.040895522388059706, "calib/mu_w": 0.04793388429752068, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0051764705882352945, "calib/std_conf": 0.047404222627870515, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2072.0, "completions/max_terminated_length": 2072.0, "completions/mean_length": 435.609375, "completions/mean_terminated_length": 437.31768798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.12053333333333334, "grad_norm": 0.009362783282995224, "learning_rate": 2.4166666666666667e-06, "loss": 0.0282, "num_tokens": 24752215.0, "reward": 1.1440508365631104, "reward_std": 0.27415621280670166, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.505890965461731, "rewards/format_reward_step": 0.98828125, "step": 113 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.150451800051137e-06, "aux_brier/mean_group_std": 0.05664586972073767, "aux_brier/mean_r": 0.9660372742649933, "aux_brier/n_active_tok": 231.875, "aux_brier/n_groups": 13.3125, "aux_brier/n_step_records": 57.96875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5644085753458806, "calib/avg_num_step_conf": 7.390625, "calib/ece": 0.5190745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025438115418172716, "calib/mean_conf": 0.03778823529411765, "calib/mu_c": 0.038915492957746475, "calib/mu_w": 0.036371681415929204, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01065843396249524, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 420.94921875, "completions/mean_terminated_length": 422.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.1216, "grad_norm": 0.17683005332946777, "learning_rate": 2.388888888888889e-06, "loss": 0.0022, "num_tokens": 24965002.0, "reward": 1.1705669164657593, "reward_std": 0.23685584962368011, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.47914260625839233, "rewards/format_reward_step": 0.9921875, "step": 114 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -7.781708822873767e-07, "aux_brier/mean_group_std": 0.05817280359278826, "aux_brier/mean_r": 0.9644236579121487, "aux_brier/n_active_tok": 226.125, "aux_brier/n_groups": 11.75, "aux_brier/n_step_records": 56.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5555972013993004, "calib/avg_num_step_conf": 7.11328125, "calib/ece": 0.5100669291338583, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.006791229385307358, "calib/mean_conf": 0.04331889763779528, "calib/mu_c": 0.04021739130434782, "calib/mu_w": 0.04700862068965518, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.005039370078740157, "calib/std_conf": 0.06247176273279215, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 948.0, "completions/max_terminated_length": 948.0, "completions/mean_length": 418.890625, "completions/mean_terminated_length": 420.5333557128906, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.12266666666666666, "grad_norm": 0.3346615731716156, "learning_rate": 2.361111111111111e-06, "loss": 0.007, "num_tokens": 25177502.0, "reward": 1.1499089002609253, "reward_std": 0.2871477007865906, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.48244792222976685, "rewards/format_reward_step": 0.97265625, "step": 115 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.519956423893913e-07, "aux_brier/mean_group_std": 0.03897763365044267, "aux_brier/mean_r": 0.9761643599604232, "aux_brier/n_active_tok": 254.5, "aux_brier/n_groups": 16.4375, "aux_brier/n_step_records": 63.625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5380801140782991, "calib/avg_num_step_conf": 8.26171875, "calib/ece": 0.4993574297188755, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00061317085817994, "calib/mean_conf": 0.03670682730923695, "calib/mu_c": 0.03699248120300752, "calib/mu_w": 0.03637931034482758, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009638554216867468, "calib/std_conf": 0.017596503668048828, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 499.73046875, "completions/mean_terminated_length": 503.66534423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.12373333333333333, "grad_norm": 0.0661558285355568, "learning_rate": 2.3333333333333336e-06, "loss": 0.0408, "num_tokens": 25409953.0, "reward": 1.1254181861877441, "reward_std": 0.27198296785354614, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.48604804277420044, "rewards/format_reward_step": 0.96875, "step": 116 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.08533166477099e-06, "aux_brier/mean_group_std": 0.047897951509321336, "aux_brier/mean_r": 0.9690990044847806, "aux_brier/n_active_tok": 259.5, "aux_brier/n_groups": 16.8125, "aux_brier/n_step_records": 64.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5697837150127226, "calib/avg_num_step_conf": 8.11328125, "calib/ece": 0.4408764940239044, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002727099236641238, "calib/mean_conf": 0.03832669322709164, "calib/mu_c": 0.039750000000000014, "calib/mu_w": 0.037022900763358776, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0005577689243027889, "calib/std_conf": 0.028333332555256656, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2331.0, "completions/max_terminated_length": 2331.0, "completions/mean_length": 491.10546875, "completions/mean_terminated_length": 491.10546875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.1248, "grad_norm": 0.007189909461885691, "learning_rate": 2.305555555555556e-06, "loss": 0.0419, "num_tokens": 25642276.0, "reward": 1.0869762897491455, "reward_std": 0.2557705044746399, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5354054570198059, "rewards/format_reward_step": 0.96875, "step": 117 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.610709073314066e-06, "aux_brier/mean_group_std": 0.046083838386966454, "aux_brier/mean_r": 0.9714296156016924, "aux_brier/n_active_tok": 237.125, "aux_brier/n_groups": 14.28125, "aux_brier/n_step_records": 59.28125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5148148148148148, "calib/avg_num_step_conf": 7.671875, "calib/ece": 0.5082208835341365, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00016257309941521136, "calib/mean_conf": 0.03394779116465863, "calib/mu_c": 0.034022222222222225, "calib/mu_w": 0.03385964912280701, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.010271088851985645, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1521.0, "completions/max_terminated_length": 1521.0, "completions/mean_length": 442.80078125, "completions/mean_terminated_length": 449.8293762207031, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.12586666666666665, "grad_norm": 0.0722954198718071, "learning_rate": 2.277777777777778e-06, "loss": -0.0293, "num_tokens": 25859641.0, "reward": 1.133664846420288, "reward_std": 0.23743316531181335, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4799717664718628, "rewards/format_reward_step": 0.97265625, "step": 118 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.457912561966812e-06, "aux_brier/mean_group_std": 0.06911768021232345, "aux_brier/mean_r": 0.9609109672958739, "aux_brier/n_active_tok": 262.875, "aux_brier/n_groups": 17.40625, "aux_brier/n_step_records": 65.71875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5227099236641222, "calib/avg_num_step_conf": 8.21875, "calib/ece": 0.44505976095617533, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010642493638676892, "calib/mean_conf": 0.033027888446215146, "calib/mu_c": 0.03358333333333333, "calib/mu_w": 0.032519083969465644, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.010197024169621382, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 522.71875, "completions/mean_terminated_length": 522.71875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.12693333333333334, "grad_norm": 0.006843086332082748, "learning_rate": 2.25e-06, "loss": 0.0582, "num_tokens": 26098521.0, "reward": 1.0915626287460327, "reward_std": 0.23342382907867432, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.53812575340271, "rewards/format_reward_step": 0.9765625, "step": 119 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.5438382932053685e-06, "aux_brier/mean_group_std": 0.07403760023497923, "aux_brier/mean_r": 0.9592139927628702, "aux_brier/n_active_tok": 218.875, "aux_brier/n_groups": 11.90625, "aux_brier/n_step_records": 54.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5660766400203019, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.529300395256917, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001622002283974116, "calib/mean_conf": 0.031964426877470364, "calib/mu_c": 0.03267605633802817, "calib/mu_w": 0.031054054054054054, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.011632155215101388, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 430.33203125, "completions/mean_terminated_length": 430.33203125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.128, "grad_norm": 0.008278531953692436, "learning_rate": 2.222222222222222e-06, "loss": 0.0438, "num_tokens": 26315374.0, "reward": 1.1601471900939941, "reward_std": 0.26189303398132324, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.46090108156204224, "rewards/format_reward_step": 0.98046875, "step": 120 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.3198501480626845e-06, "aux_brier/mean_group_std": 0.05460861669709895, "aux_brier/mean_r": 0.9674395889680963, "aux_brier/n_active_tok": 249.625, "aux_brier/n_groups": 14.125, "aux_brier/n_step_records": 62.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5341512775107513, "calib/avg_num_step_conf": 8.28125, "calib/ece": 0.4364126984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021791044776119373, "calib/mean_conf": 0.031841269841269844, "calib/mu_c": 0.032999999999999995, "calib/mu_w": 0.030820895522388057, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011353212746061988, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 478.0234375, "completions/mean_terminated_length": 483.69171142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.12906666666666666, "grad_norm": 0.0893382579088211, "learning_rate": 2.1944444444444445e-06, "loss": -0.0206, "num_tokens": 26542804.0, "reward": 1.09130859375, "reward_std": 0.27117180824279785, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5527344942092896, "rewards/format_reward_step": 0.984375, "step": 121 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.561933296034823e-07, "aux_brier/mean_group_std": 0.06950377476941044, "aux_brier/mean_r": 0.9614083428039205, "aux_brier/n_active_tok": 228.25, "aux_brier/n_groups": 11.75, "aux_brier/n_step_records": 57.0625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6495856074388519, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.580296, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004283538845091306, "calib/mean_conf": 0.031704, "calib/mu_c": 0.03336601307189543, "calib/mu_w": 0.02908247422680412, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.010392323320605456, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2416.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 439.09375, "completions/mean_terminated_length": 442.5511779785156, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.13013333333333332, "grad_norm": 0.089215949177742, "learning_rate": 2.166666666666667e-06, "loss": 0.0221, "num_tokens": 26762556.0, "reward": 1.1824750900268555, "reward_std": 0.22523635625839233, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.4095883369445801, "rewards/format_reward_step": 0.96484375, "step": 122 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.48899481078824e-06, "aux_brier/mean_group_std": 0.07317168488485834, "aux_brier/mean_r": 0.9473232174387858, "aux_brier/n_active_tok": 249.375, "aux_brier/n_groups": 14.3125, "aux_brier/n_step_records": 62.34375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4970003749531309, "calib/avg_num_step_conf": 8.23828125, "calib/ece": 0.46628458498023717, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00017185351831021378, "calib/mean_conf": 0.03173913043478261, "calib/mu_c": 0.031825396825396826, "calib/mu_w": 0.03165354330708661, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00945862272785462, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1597.0, "completions/max_terminated_length": 1597.0, "completions/mean_length": 479.8125, "completions/mean_terminated_length": 485.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1312, "grad_norm": 0.25065913796424866, "learning_rate": 2.138888888888889e-06, "loss": -0.0364, "num_tokens": 26990676.0, "reward": 1.1179126501083374, "reward_std": 0.2727106213569641, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5263378620147705, "rewards/format_reward_step": 0.98828125, "step": 123 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.8334267029018747e-06, "aux_brier/mean_group_std": 0.053535013414634715, "aux_brier/mean_r": 0.963970846064989, "aux_brier/n_active_tok": 228.125, "aux_brier/n_groups": 12.78125, "aux_brier/n_step_records": 57.03125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4924968866749688, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.5387265625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00018405977584059346, "calib/mean_conf": 0.0315859375, "calib/mu_c": 0.03150684931506849, "calib/mu_w": 0.03169090909090908, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009656626079852825, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 443.515625, "completions/mean_terminated_length": 445.2549133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.13226666666666667, "grad_norm": 0.09617814421653748, "learning_rate": 2.1111111111111114e-06, "loss": 0.0235, "num_tokens": 27211032.0, "reward": 1.186445951461792, "reward_std": 0.13561290502548218, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.46453407406806946, "rewards/format_reward_step": 1.0, "step": 124 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.3602710374770464e-06, "aux_brier/mean_group_std": 0.06598502881847493, "aux_brier/mean_r": 0.9600569149164065, "aux_brier/n_active_tok": 233.875, "aux_brier/n_groups": 16.34375, "aux_brier/n_step_records": 58.46875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5076048186284406, "calib/avg_num_step_conf": 7.5390625, "calib/ece": 0.4482456557377049, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00013171411265899824, "calib/mean_conf": 0.0320822131147541, "calib/mu_c": 0.032150769230769226, "calib/mu_w": 0.03201905511811023, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00040983606557377055, "calib/std_conf": 0.011573074071428855, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 487.9296875, "completions/mean_terminated_length": 489.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.13333333333333333, "grad_norm": 0.09179168194532394, "learning_rate": 2.0833333333333334e-06, "loss": 0.0881, "num_tokens": 27440750.0, "reward": 1.0617573261260986, "reward_std": 0.3050077557563782, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5204670429229736, "rewards/format_reward_step": 0.94921875, "step": 125 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.091867457229782e-06, "aux_brier/mean_group_std": 0.07556235001658002, "aux_brier/mean_r": 0.9444273987982837, "aux_brier/n_active_tok": 233.625, "aux_brier/n_groups": 12.8125, "aux_brier/n_step_records": 58.40625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5751915708812261, "calib/avg_num_step_conf": 7.69921875, "calib/ece": 0.5035936254980079, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0028943167305236286, "calib/mean_conf": 0.03425498007968127, "calib/mu_c": 0.03559259259259259, "calib/mu_w": 0.032698275862068964, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012785337916420253, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 461.37109375, "completions/mean_terminated_length": 465.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.1344, "grad_norm": 0.03541954979300499, "learning_rate": 2.0555555555555555e-06, "loss": -0.0137, "num_tokens": 27664325.0, "reward": 1.1399164199829102, "reward_std": 0.23595809936523438, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4893532991409302, "rewards/format_reward_step": 0.98046875, "step": 126 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.4350087719217086e-06, "aux_brier/mean_group_std": 0.04903680719944231, "aux_brier/mean_r": 0.9662162115789407, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 17.3125, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5781046360081922, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.482207874015748, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0029358158009060997, "calib/mean_conf": 0.03354015748031496, "calib/mu_c": 0.034961832061068704, "calib/mu_w": 0.032026016260162604, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00994752578617108, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 456.46484375, "completions/mean_terminated_length": 456.46484375, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.13546666666666668, "grad_norm": 0.007647297345101833, "learning_rate": 2.027777777777778e-06, "loss": 0.04, "num_tokens": 27884852.0, "reward": 1.1365714073181152, "reward_std": 0.23692110180854797, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5150356292724609, "rewards/format_reward_step": 0.9921875, "step": 127 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.3018596861518192e-06, "aux_brier/mean_group_std": 0.041965678466232094, "aux_brier/mean_r": 0.9697844503544641, "aux_brier/n_active_tok": 223.25, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 55.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6206041625888954, "calib/avg_num_step_conf": 7.02734375, "calib/ece": 0.4380483870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004920206172114583, "calib/mean_conf": 0.033725806451612904, "calib/mu_c": 0.036324786324786335, "calib/mu_w": 0.03140458015267175, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.012262806807629674, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2567.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 450.3203125, "completions/mean_terminated_length": 455.66009521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.13653333333333334, "grad_norm": 0.02723911590874195, "learning_rate": 2.0000000000000003e-06, "loss": -0.0055, "num_tokens": 28106798.0, "reward": 1.0753717422485352, "reward_std": 0.26309192180633545, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5436742901802063, "rewards/format_reward_step": 0.96484375, "step": 128 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.6262359635310641e-06, "aux_brier/mean_group_std": 0.07015446827119359, "aux_brier/mean_r": 0.9520226037816187, "aux_brier/n_active_tok": 253.125, "aux_brier/n_groups": 15.71875, "aux_brier/n_step_records": 63.28125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.497678916827853, "calib/avg_num_step_conf": 7.984375, "calib/ece": 0.5247808764940238, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002114764667956251, "calib/mean_conf": 0.03697211155378487, "calib/mu_c": 0.036879432624113466, "calib/mu_w": 0.03709090909090909, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011237884982858392, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 460.859375, "completions/mean_terminated_length": 462.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.1376, "grad_norm": 0.0317411795258522, "learning_rate": 1.9722222222222224e-06, "loss": 0.0439, "num_tokens": 28327162.0, "reward": 1.1582276821136475, "reward_std": 0.1795579493045807, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.46884840726852417, "rewards/format_reward_step": 0.98046875, "step": 129 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.841466538480944e-06, "aux_brier/mean_group_std": 0.05399584990671969, "aux_brier/mean_r": 0.9670969175815727, "aux_brier/n_active_tok": 229.5, "aux_brier/n_groups": 12.8125, "aux_brier/n_step_records": 57.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5582240226337448, "calib/avg_num_step_conf": 7.37890625, "calib/ece": 0.5330555555555556, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034722222222222238, "calib/mean_conf": 0.03837301587301588, "calib/mu_c": 0.03986111111111111, "calib/mu_w": 0.03638888888888889, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01294569495580044, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2298.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 432.0078125, "completions/mean_terminated_length": 433.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.13866666666666666, "grad_norm": 0.05528915300965309, "learning_rate": 1.944444444444445e-06, "loss": 0.0211, "num_tokens": 28543044.0, "reward": 1.1688967943191528, "reward_std": 0.19122979044914246, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.46464961767196655, "rewards/format_reward_step": 0.98046875, "step": 130 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.6664377451292012e-06, "aux_brier/mean_group_std": 0.04716095616572432, "aux_brier/mean_r": 0.9729032156268617, "aux_brier/n_active_tok": 228.5, "aux_brier/n_groups": 14.0625, "aux_brier/n_step_records": 57.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.632483660130719, "calib/avg_num_step_conf": 7.2734375, "calib/ece": 0.36559523809523814, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005039215686274509, "calib/mean_conf": 0.04003968253968254, "calib/mu_c": 0.0430392156862745, "calib/mu_w": 0.03799999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004365079365079365, "calib/std_conf": 0.01142602209220689, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 409.79296875, "completions/mean_terminated_length": 411.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.13973333333333332, "grad_norm": 0.009943638928234577, "learning_rate": 1.916666666666667e-06, "loss": 0.0278, "num_tokens": 28754159.0, "reward": 1.0452568531036377, "reward_std": 0.18828809261322021, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6185277700424194, "rewards/format_reward_step": 0.984375, "step": 131 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.3497392188653876e-06, "aux_brier/mean_group_std": 0.10650228961541859, "aux_brier/mean_r": 0.9199625817435015, "aux_brier/n_active_tok": 246.75, "aux_brier/n_groups": 15.0625, "aux_brier/n_step_records": 61.6875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5904807692307691, "calib/avg_num_step_conf": 7.71484375, "calib/ece": 0.5504330708661417, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003293589743589738, "calib/mean_conf": 0.04011811023622047, "calib/mu_c": 0.04146666666666666, "calib/mu_w": 0.03817307692307692, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012374799241656916, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 476.90234375, "completions/mean_terminated_length": 476.90234375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.1408, "grad_norm": 0.007062196731567383, "learning_rate": 1.888888888888889e-06, "loss": 0.0843, "num_tokens": 28981838.0, "reward": 1.1953049898147583, "reward_std": 0.24086421728134155, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.4530948996543884, "rewards/format_reward_step": 0.9921875, "step": 132 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.0968403521857155e-06, "aux_brier/mean_group_std": 0.044143214320325724, "aux_brier/mean_r": 0.9686348367300469, "aux_brier/n_active_tok": 266.75, "aux_brier/n_groups": 15.96875, "aux_brier/n_step_records": 66.6875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5717361585782638, "calib/avg_num_step_conf": 8.484375, "calib/ece": 0.3443373493975903, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.0015037593984962294, "calib/mean_conf": 0.045140562248995986, "calib/mu_c": 0.04421052631578948, "calib/mu_w": 0.04571428571428571, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003975903614457831, "calib/std_conf": 0.06135092554760138, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2814.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 533.01171875, "completions/mean_terminated_length": 535.1019897460938, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.14186666666666667, "grad_norm": 0.047625601291656494, "learning_rate": 1.8611111111111113e-06, "loss": 0.0934, "num_tokens": 29224633.0, "reward": 1.0097026824951172, "reward_std": 0.29551905393600464, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6247484087944031, "rewards/format_reward_step": 0.96484375, "step": 133 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.0788852103946773e-06, "aux_brier/mean_group_std": 0.05374224382897951, "aux_brier/mean_r": 0.9622196910151494, "aux_brier/n_active_tok": 247.75, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 61.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5610791826309068, "calib/avg_num_step_conf": 7.79296875, "calib/ece": 0.42318725099601595, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00256704980842911, "calib/mean_conf": 0.03896414342629481, "calib/mu_c": 0.04034482758620689, "calib/mu_w": 0.03777777777777778, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.012389861761354913, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 508.6328125, "completions/mean_terminated_length": 512.6378173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.14293333333333333, "grad_norm": 0.05960475653409958, "learning_rate": 1.8333333333333333e-06, "loss": 0.0179, "num_tokens": 29463795.0, "reward": 1.0839262008666992, "reward_std": 0.2569059729576111, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5622671842575073, "rewards/format_reward_step": 0.98046875, "step": 134 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.640086092384422e-06, "aux_brier/mean_group_std": 0.03808248767728147, "aux_brier/mean_r": 0.9762806866309728, "aux_brier/n_active_tok": 242.75, "aux_brier/n_groups": 15.03125, "aux_brier/n_step_records": 60.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4719419924337957, "calib/avg_num_step_conf": 7.625, "calib/ece": 0.47575, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008102143757881541, "calib/mean_conf": 0.040123015873015866, "calib/mu_c": 0.03973076923076922, "calib/mu_w": 0.040540983606557376, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012658077681348367, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2734.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 508.1484375, "completions/mean_terminated_length": 508.1484375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.144, "grad_norm": 0.00830479059368372, "learning_rate": 1.8055555555555557e-06, "loss": 0.0416, "num_tokens": 29699761.0, "reward": 1.1277220249176025, "reward_std": 0.23875916004180908, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5108880400657654, "rewards/format_reward_step": 0.9765625, "step": 135 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.942148501108079e-06, "aux_brier/mean_group_std": 0.0384968094462189, "aux_brier/mean_r": 0.9768957754704042, "aux_brier/n_active_tok": 246.875, "aux_brier/n_groups": 14.25, "aux_brier/n_step_records": 61.71875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5098223812213645, "calib/avg_num_step_conf": 7.73828125, "calib/ece": 0.41082283464566927, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.010541831419067338, "calib/mean_conf": 0.04744488188976377, "calib/mu_c": 0.0415929203539823, "calib/mu_w": 0.05213475177304964, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0066929133858267716, "calib/std_conf": 0.07385564576126066, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2488.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 475.51953125, "completions/mean_terminated_length": 475.51953125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.14506666666666668, "grad_norm": 0.007522990461438894, "learning_rate": 1.777777777777778e-06, "loss": 0.0359, "num_tokens": 29929982.0, "reward": 1.077108383178711, "reward_std": 0.22018541395664215, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5740584135055542, "rewards/format_reward_step": 0.984375, "step": 136 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.3910668150707295e-07, "aux_brier/mean_group_std": 0.05392120868715749, "aux_brier/mean_r": 0.9598089070251012, "aux_brier/n_active_tok": 248.0, "aux_brier/n_groups": 17.46875, "aux_brier/n_step_records": 62.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5445488721804511, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.4275810276679842, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.010537343358395998, "calib/mean_conf": 0.04672727272727272, "calib/mu_c": 0.05226666666666667, "calib/mu_w": 0.04172932330827067, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06051015600309179, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2810.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 452.89453125, "completions/mean_terminated_length": 454.6706237792969, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.14613333333333334, "grad_norm": 0.008079344406723976, "learning_rate": 1.75e-06, "loss": 0.0117, "num_tokens": 30152907.0, "reward": 1.1006499528884888, "reward_std": 0.2560088634490967, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5588501691818237, "rewards/format_reward_step": 0.984375, "step": 137 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.5199230675454487e-06, "aux_brier/mean_group_std": 0.041636905255926476, "aux_brier/mean_r": 0.9728434556229701, "aux_brier/n_active_tok": 242.0, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 60.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.531087577962578, "calib/avg_num_step_conf": 7.69140625, "calib/ece": 0.5457936507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008082120582120558, "calib/mean_conf": 0.042301587301587305, "calib/mu_c": 0.04263513513513513, "calib/mu_w": 0.041826923076923074, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.010091855777607468, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 445.3828125, "completions/mean_terminated_length": 448.8897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.1472, "grad_norm": 0.016710827127099037, "learning_rate": 1.7222222222222224e-06, "loss": 0.0385, "num_tokens": 30371261.0, "reward": 1.1837337017059326, "reward_std": 0.2773284316062927, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.45368513464927673, "rewards/format_reward_step": 0.984375, "step": 138 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.9383198082988393e-06, "aux_brier/mean_group_std": 0.05748919481822279, "aux_brier/mean_r": 0.9611986358883595, "aux_brier/n_active_tok": 214.375, "aux_brier/n_groups": 11.84375, "aux_brier/n_step_records": 53.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5597033528605261, "calib/avg_num_step_conf": 6.81640625, "calib/ece": 0.5160823529411764, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002291349869126269, "calib/mean_conf": 0.04078039215686275, "calib/mu_c": 0.04179577464788733, "calib/mu_w": 0.03950442477876106, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.009377842462847076, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 402.2421875, "completions/mean_terminated_length": 403.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.14826666666666666, "grad_norm": 0.013816448859870434, "learning_rate": 1.6944444444444446e-06, "loss": -0.0254, "num_tokens": 30577331.0, "reward": 1.1713128089904785, "reward_std": 0.2394649088382721, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4821265935897827, "rewards/format_reward_step": 0.9921875, "step": 139 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.4136152465270566e-06, "aux_brier/mean_group_std": 0.048222059957680787, "aux_brier/mean_r": 0.9646903379686798, "aux_brier/n_active_tok": 226.125, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 56.53125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5741913517194416, "calib/avg_num_step_conf": 7.20703125, "calib/ece": 0.6096102362204724, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002519237316990122, "calib/mean_conf": 0.039996062992125984, "calib/mu_c": 0.040878787878787876, "calib/mu_w": 0.03835955056179775, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00967442363600775, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 442.96875, "completions/mean_terminated_length": 444.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.14933333333333335, "grad_norm": 0.12129943072795868, "learning_rate": 1.6666666666666667e-06, "loss": 0.0224, "num_tokens": 30795747.0, "reward": 1.2441990375518799, "reward_std": 0.2039608657360077, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.3986715078353882, "rewards/format_reward_step": 0.9921875, "step": 140 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.430010737104361e-07, "aux_brier/mean_group_std": 0.05504474775615245, "aux_brier/mean_r": 0.9557840337264398, "aux_brier/n_active_tok": 222.5, "aux_brier/n_groups": 13.53125, "aux_brier/n_step_records": 55.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6478922125457529, "calib/avg_num_step_conf": 7.171875, "calib/ece": 0.5089486166007905, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006007572889057178, "calib/mean_conf": 0.040458498023715414, "calib/mu_c": 0.04316546762589928, "calib/mu_w": 0.037157894736842105, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011003559292867749, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2629.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 462.74609375, "completions/mean_terminated_length": 464.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1504, "grad_norm": 0.018802516162395477, "learning_rate": 1.638888888888889e-06, "loss": 0.0292, "num_tokens": 31021306.0, "reward": 1.156792402267456, "reward_std": 0.2195369005203247, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.48654428124427795, "rewards/format_reward_step": 0.984375, "step": 141 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.2907357074840604e-06, "aux_brier/mean_group_std": 0.06699872421413317, "aux_brier/mean_r": 0.9522982126509846, "aux_brier/n_active_tok": 234.0, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 58.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6051772962324552, "calib/avg_num_step_conf": 7.3125, "calib/ece": 0.472313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038469588771238503, "calib/mean_conf": 0.04141176470588235, "calib/mu_c": 0.0432824427480916, "calib/mu_w": 0.03943548387096775, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010231011048859815, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 443.5859375, "completions/mean_terminated_length": 445.3255310058594, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.15146666666666667, "grad_norm": 0.03870021551847458, "learning_rate": 1.6111111111111113e-06, "loss": -0.0109, "num_tokens": 31240024.0, "reward": 1.1394506692886353, "reward_std": 0.18925997614860535, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5265531539916992, "rewards/format_reward_step": 0.9921875, "step": 142 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.100163481371208e-06, "aux_brier/mean_group_std": 0.06091011977021592, "aux_brier/mean_r": 0.9659451125676692, "aux_brier/n_active_tok": 231.5, "aux_brier/n_groups": 13.78125, "aux_brier/n_step_records": 57.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4878180661577608, "calib/avg_num_step_conf": 7.2578125, "calib/ece": 0.4822231075697211, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000595419847328249, "calib/mean_conf": 0.039689243027888454, "calib/mu_c": 0.03940458015267176, "calib/mu_w": 0.04000000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.010282418553020646, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2547.0, "completions/max_terminated_length": 2547.0, "completions/mean_length": 483.70703125, "completions/mean_terminated_length": 485.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.15253333333333333, "grad_norm": 0.00964946299791336, "learning_rate": 1.5833333333333333e-06, "loss": 0.0459, "num_tokens": 31471189.0, "reward": 1.118970274925232, "reward_std": 0.23116764426231384, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.49931907653808594, "rewards/format_reward_step": 0.96484375, "step": 143 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.7418115696443692e-06, "aux_brier/mean_group_std": 0.02911257219086772, "aux_brier/mean_r": 0.9812782295986435, "aux_brier/n_active_tok": 225.875, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 56.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6049480194762469, "calib/avg_num_step_conf": 7.09375, "calib/ece": 0.5510916334661354, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005094222924068963, "calib/mean_conf": 0.04253386454183267, "calib/mu_c": 0.044604026845637586, "calib/mu_w": 0.03950980392156862, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013390319115745215, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 468.78515625, "completions/mean_terminated_length": 468.78515625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.1536, "grad_norm": 0.0072034187614917755, "learning_rate": 1.5555555555555558e-06, "loss": 0.0611, "num_tokens": 31695326.0, "reward": 1.1843680143356323, "reward_std": 0.22442063689231873, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4484097957611084, "rewards/format_reward_step": 0.98046875, "step": 144 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.663540706882017e-06, "aux_brier/mean_group_std": 0.0773822811106855, "aux_brier/mean_r": 0.9442634775455325, "aux_brier/n_active_tok": 232.875, "aux_brier/n_groups": 13.71875, "aux_brier/n_step_records": 58.21875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5897197420634921, "calib/avg_num_step_conf": 7.31640625, "calib/ece": 0.5188671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032837301587301665, "calib/mean_conf": 0.04363281250000001, "calib/mu_c": 0.04506944444444445, "calib/mu_w": 0.04178571428571428, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010883165823410198, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 425.484375, "completions/mean_terminated_length": 427.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.15466666666666667, "grad_norm": 0.00815511029213667, "learning_rate": 1.527777777777778e-06, "loss": 0.0387, "num_tokens": 31906954.0, "reward": 1.1840451955795288, "reward_std": 0.2767128646373749, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.48618084192276, "rewards/format_reward_step": 1.0, "step": 145 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.1936076177483415e-06, "aux_brier/mean_group_std": 0.0266679472405245, "aux_brier/mean_r": 0.9837740362043433, "aux_brier/n_active_tok": 224.5, "aux_brier/n_groups": 12.34375, "aux_brier/n_step_records": 56.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6357101782633697, "calib/avg_num_step_conf": 7.3203125, "calib/ece": 0.39953492063492063, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005112440099674133, "calib/mean_conf": 0.04094126984126984, "calib/mu_c": 0.04380180180180179, "calib/mu_w": 0.03868936170212766, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011170996103439421, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1512.0, "completions/max_terminated_length": 1512.0, "completions/mean_length": 445.8671875, "completions/mean_terminated_length": 449.3779602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.15573333333333333, "grad_norm": 0.16069373488426208, "learning_rate": 1.5e-06, "loss": 0.0081, "num_tokens": 32128312.0, "reward": 1.069602131843567, "reward_std": 0.2339491844177246, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5830962657928467, "rewards/format_reward_step": 0.98046875, "step": 146 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.122242581813042e-06, "aux_brier/mean_group_std": 0.04190042669157262, "aux_brier/mean_r": 0.9669998645399763, "aux_brier/n_active_tok": 228.25, "aux_brier/n_groups": 12.3125, "aux_brier/n_step_records": 57.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5327541827541827, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.3733201581027668, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006274131274131289, "calib/mean_conf": 0.042490118577075096, "calib/mu_c": 0.04285714285714286, "calib/mu_w": 0.04222972972972973, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.010693580745283591, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 433.19140625, "completions/mean_terminated_length": 438.3280944824219, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.1568, "grad_norm": 0.0881151482462883, "learning_rate": 1.4722222222222225e-06, "loss": -0.0246, "num_tokens": 32342889.0, "reward": 1.0542166233062744, "reward_std": 0.187119722366333, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6074917912483215, "rewards/format_reward_step": 0.984375, "step": 147 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.676872145587986e-06, "aux_brier/mean_group_std": 0.05869642259014696, "aux_brier/mean_r": 0.9598940281989762, "aux_brier/n_active_tok": 231.125, "aux_brier/n_groups": 13.71875, "aux_brier/n_step_records": 57.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6284667713239142, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.5698425196850394, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004559131344845639, "calib/mean_conf": 0.04433070866141732, "calib/mu_c": 0.04608974358974359, "calib/mu_w": 0.04153061224489795, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011977048338358404, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 441.85546875, "completions/mean_terminated_length": 441.85546875, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.15786666666666666, "grad_norm": 0.008789680898189545, "learning_rate": 1.4444444444444445e-06, "loss": 0.06, "num_tokens": 32561116.0, "reward": 1.2146917581558228, "reward_std": 0.2064843475818634, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.43689218163490295, "rewards/format_reward_step": 0.9921875, "step": 148 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.879356427954894e-06, "aux_brier/mean_group_std": 0.060673587415830385, "aux_brier/mean_r": 0.9645449808870379, "aux_brier/n_active_tok": 254.625, "aux_brier/n_groups": 14.8125, "aux_brier/n_step_records": 63.65625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.54015503875969, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.447988188976378, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003981705426356605, "calib/mean_conf": 0.04413779527559055, "calib/mu_c": 0.04616000000000001, "calib/mu_w": 0.0421782945736434, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024315201910699562, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2418.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 493.984375, "completions/mean_terminated_length": 493.984375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.15893333333333334, "grad_norm": 0.0068074800074100494, "learning_rate": 1.4166666666666667e-06, "loss": 0.0289, "num_tokens": 32792032.0, "reward": 1.1180649995803833, "reward_std": 0.2500190734863281, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5425726771354675, "rewards/format_reward_step": 0.98828125, "step": 149 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.052330016186435e-06, "aux_brier/mean_group_std": 0.05836092531492694, "aux_brier/mean_r": 0.9614388214260573, "aux_brier/n_active_tok": 224.375, "aux_brier/n_groups": 14.21875, "aux_brier/n_step_records": 56.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5908130601792574, "calib/avg_num_step_conf": 7.01171875, "calib/ece": 0.5207142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.329065300895773e-05, "calib/mean_conf": 0.04674603174603175, "calib/mu_c": 0.04676056338028168, "calib/mu_w": 0.04672727272727272, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001984126984126984, "calib/std_conf": 0.030869139186155324, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2596.0, "completions/max_terminated_length": 2596.0, "completions/mean_length": 432.80859375, "completions/mean_terminated_length": 432.80859375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.16, "grad_norm": 0.007508590817451477, "learning_rate": 1.3888888888888892e-06, "loss": 0.0385, "num_tokens": 33007791.0, "reward": 1.1638078689575195, "reward_std": 0.21474945545196533, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.475543737411499, "rewards/format_reward_step": 0.98046875, "step": 150 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.030056679203085e-06, "aux_brier/mean_group_std": 0.05431678431607821, "aux_brier/mean_r": 0.9656270469246786, "aux_brier/n_active_tok": 235.75, "aux_brier/n_groups": 15.125, "aux_brier/n_step_records": 58.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6165965046562062, "calib/avg_num_step_conf": 7.70703125, "calib/ece": 0.42398406374501985, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.0010352085725219909, "calib/mean_conf": 0.0500398406374502, "calib/mu_c": 0.0494871794871795, "calib/mu_w": 0.05052238805970149, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003944223107569721, "calib/std_conf": 0.06177002720842194, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2549.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 454.0703125, "completions/mean_terminated_length": 459.4545593261719, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.16106666666666666, "grad_norm": 0.18628118932247162, "learning_rate": 1.3611111111111112e-06, "loss": 0.0335, "num_tokens": 33231057.0, "reward": 1.0839394330978394, "reward_std": 0.23412418365478516, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5623202919960022, "rewards/format_reward_step": 0.97265625, "step": 151 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.4813556452608623e-06, "aux_brier/mean_group_std": 0.024609692455337367, "aux_brier/mean_r": 0.9845774417567617, "aux_brier/n_active_tok": 232.375, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 58.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5571491754596576, "calib/avg_num_step_conf": 7.4296875, "calib/ece": 0.4276984126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010260946483856767, "calib/mean_conf": 0.04634920634920635, "calib/mu_c": 0.046890756302521014, "calib/mu_w": 0.04586466165413534, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009126984126984126, "calib/std_conf": 0.012286473049864192, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 440.078125, "completions/mean_terminated_length": 443.5433044433594, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.16213333333333332, "grad_norm": 0.06021711230278015, "learning_rate": 1.3333333333333334e-06, "loss": 0.0013, "num_tokens": 33449109.0, "reward": 1.0943185091018677, "reward_std": 0.253381609916687, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.556961715221405, "rewards/format_reward_step": 0.98046875, "step": 152 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.954473118162221e-07, "aux_brier/mean_group_std": 0.07337225726323636, "aux_brier/mean_r": 0.9445023351861707, "aux_brier/n_active_tok": 233.0, "aux_brier/n_groups": 14.96875, "aux_brier/n_step_records": 58.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5616624525916561, "calib/avg_num_step_conf": 7.39453125, "calib/ece": 0.5073517786561265, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011826801517066943, "calib/mean_conf": 0.04711462450592886, "calib/mu_c": 0.04764285714285714, "calib/mu_w": 0.046460176991150445, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005533596837944664, "calib/std_conf": 0.011557328474167147, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 458.6484375, "completions/mean_terminated_length": 460.44708251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.1632, "grad_norm": 0.14926986396312714, "learning_rate": 1.3055555555555556e-06, "loss": 0.0176, "num_tokens": 33673843.0, "reward": 1.1638129949569702, "reward_std": 0.24860437214374542, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.4911898374557495, "rewards/format_reward_step": 0.98828125, "step": 153 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.602123753334354e-06, "aux_brier/mean_group_std": 0.0533491072908468, "aux_brier/mean_r": 0.9626451918972292, "aux_brier/n_active_tok": 216.375, "aux_brier/n_groups": 12.21875, "aux_brier/n_step_records": 54.09375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5833640383198232, "calib/avg_num_step_conf": 6.7890625, "calib/ece": 0.41314453125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001814664701547529, "calib/mean_conf": 0.04779296875000001, "calib/mu_c": 0.04877118644067796, "calib/mu_w": 0.04695652173913043, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.013546509488112554, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 930.0, "completions/max_terminated_length": 930.0, "completions/mean_length": 410.5625, "completions/mean_terminated_length": 412.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.16426666666666667, "grad_norm": 0.007773758377879858, "learning_rate": 1.2777777777777779e-06, "loss": 0.017, "num_tokens": 33883387.0, "reward": 1.1004674434661865, "reward_std": 0.20809802412986755, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5737451910972595, "rewards/format_reward_step": 0.9921875, "step": 154 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.664061979189118e-07, "aux_brier/mean_group_std": 0.06166461096641661, "aux_brier/mean_r": 0.9576725326485513, "aux_brier/n_active_tok": 214.0, "aux_brier/n_groups": 12.125, "aux_brier/n_step_records": 53.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5149309443822323, "calib/avg_num_step_conf": 6.703125, "calib/ece": 0.4006156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006366181410974378, "calib/mean_conf": 0.04730588235294117, "calib/mu_c": 0.047657894736842114, "calib/mu_w": 0.04702127659574468, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00043137254901960784, "calib/std_conf": 0.01128562180456048, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 390.046875, "completions/mean_terminated_length": 391.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.16533333333333333, "grad_norm": 0.009233204647898674, "learning_rate": 1.25e-06, "loss": -0.0112, "num_tokens": 34090455.0, "reward": 1.0910768508911133, "reward_std": 0.21764743328094482, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5908705592155457, "rewards/format_reward_step": 0.99609375, "step": 155 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.325620095946281e-06, "aux_brier/mean_group_std": 0.054480400654208366, "aux_brier/mean_r": 0.9674517185710153, "aux_brier/n_active_tok": 243.875, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 60.96875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.539719329438659, "calib/avg_num_step_conf": 7.68359375, "calib/ece": 0.4561354581673307, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000800736601473187, "calib/mean_conf": 0.04984063745019921, "calib/mu_c": 0.05023622047244094, "calib/mu_w": 0.04943548387096775, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012717216347358796, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 465.54296875, "completions/mean_terminated_length": 467.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.1664, "grad_norm": 0.036862220615148544, "learning_rate": 1.2222222222222223e-06, "loss": 0.0194, "num_tokens": 34314394.0, "reward": 1.1163079738616943, "reward_std": 0.23989202082157135, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5277324318885803, "rewards/format_reward_step": 0.9765625, "step": 156 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 6.117947797312784e-06, "aux_brier/mean_group_std": 0.0758693357565563, "aux_brier/mean_r": 0.953219320369304, "aux_brier/n_active_tok": 241.25, "aux_brier/n_groups": 14.40625, "aux_brier/n_step_records": 60.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5507772680937819, "calib/avg_num_step_conf": 7.7734375, "calib/ece": 0.5174031620553359, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019758537206931784, "calib/mean_conf": 0.05176679841897234, "calib/mu_c": 0.05261805555555556, "calib/mu_w": 0.05064220183486238, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.016348386088374075, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 427.2734375, "completions/mean_terminated_length": 428.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.16746666666666668, "grad_norm": 0.01833532564342022, "learning_rate": 1.1944444444444446e-06, "loss": 0.0152, "num_tokens": 34527504.0, "reward": 1.1730599403381348, "reward_std": 0.275108277797699, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.48130229115486145, "rewards/format_reward_step": 0.98046875, "step": 157 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.509500303924053e-06, "aux_brier/mean_group_std": 0.05750336788565342, "aux_brier/mean_r": 0.9604100471300262, "aux_brier/n_active_tok": 247.375, "aux_brier/n_groups": 16.125, "aux_brier/n_step_records": 61.84375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4988127326402259, "calib/avg_num_step_conf": 7.73046875, "calib/ece": 0.5304347826086955, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00037094082916183657, "calib/mean_conf": 0.05059288537549407, "calib/mu_c": 0.05074829931972789, "calib/mu_w": 0.05037735849056605, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012512612955692191, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3052.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 475.828125, "completions/mean_terminated_length": 475.828125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.16853333333333334, "grad_norm": 0.008424046449363232, "learning_rate": 1.1666666666666668e-06, "loss": 0.1205, "num_tokens": 34754556.0, "reward": 1.1857742071151733, "reward_std": 0.2292053997516632, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.46965938806533813, "rewards/format_reward_step": 0.98828125, "step": 158 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.003362758770891e-06, "aux_brier/mean_group_std": 0.03255680191022221, "aux_brier/mean_r": 0.9800528170120831, "aux_brier/n_active_tok": 215.0, "aux_brier/n_groups": 11.78125, "aux_brier/n_step_records": 53.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5679563492063492, "calib/avg_num_step_conf": 7.04296875, "calib/ece": 0.4506889763779528, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008302331349206368, "calib/mean_conf": 0.049783464566929135, "calib/mu_c": 0.04936507936507937, "calib/mu_w": 0.050195312500000006, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002204724409448819, "calib/std_conf": 0.0193701095480198, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1813.0, "completions/max_terminated_length": 1813.0, "completions/mean_length": 407.72265625, "completions/mean_terminated_length": 409.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.1696, "grad_norm": 0.1121128648519516, "learning_rate": 1.138888888888889e-06, "loss": -0.012, "num_tokens": 34963717.0, "reward": 1.1217937469482422, "reward_std": 0.2411247342824936, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.541862428188324, "rewards/format_reward_step": 0.98828125, "step": 159 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 7.517531570189195e-06, "aux_brier/mean_group_std": 0.03846520621942318, "aux_brier/mean_r": 0.9789132501042819, "aux_brier/n_active_tok": 238.125, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 59.53125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6025040650406505, "calib/avg_num_step_conf": 7.9921875, "calib/ece": 0.4452016129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004286829268292679, "calib/mean_conf": 0.05076612903225808, "calib/mu_c": 0.052926829268292695, "calib/mu_w": 0.04864000000000002, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013992284089735556, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 416.046875, "completions/mean_terminated_length": 422.65081787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.17066666666666666, "grad_norm": 0.25963467359542847, "learning_rate": 1.111111111111111e-06, "loss": 0.0256, "num_tokens": 35175065.0, "reward": 1.0989573001861572, "reward_std": 0.26471462845802307, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5364543199539185, "rewards/format_reward_step": 0.96875, "step": 160 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.7469933693291537e-06, "aux_brier/mean_group_std": 0.07084469799028036, "aux_brier/mean_r": 0.9581898140432695, "aux_brier/n_active_tok": 203.125, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 50.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.557933122333838, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.6153333333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.007453557176276326, "calib/mean_conf": 0.05517647058823529, "calib/mu_c": 0.05266272189349112, "calib/mu_w": 0.06011627906976744, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0038823529411764705, "calib/std_conf": 0.06054546367695088, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2293.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 397.84765625, "completions/mean_terminated_length": 397.84765625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.17173333333333332, "grad_norm": 0.008866028860211372, "learning_rate": 1.0833333333333335e-06, "loss": 0.0211, "num_tokens": 35380834.0, "reward": 1.2528870105743408, "reward_std": 0.21473273634910583, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.3943605422973633, "rewards/format_reward_step": 0.98828125, "step": 161 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.1794646693495956e-06, "aux_brier/mean_group_std": 0.05350575704744897, "aux_brier/mean_r": 0.9654285859149274, "aux_brier/n_active_tok": 220.0, "aux_brier/n_groups": 12.40625, "aux_brier/n_step_records": 55.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5192154520111508, "calib/avg_num_step_conf": 7.015625, "calib/ece": 0.5847058823529412, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005874153723616216, "calib/mean_conf": 0.050588235294117656, "calib/mu_c": 0.05080246913580247, "calib/mu_w": 0.05021505376344085, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009943322659617565, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1134.0, "completions/max_terminated_length": 1134.0, "completions/mean_length": 404.54296875, "completions/mean_terminated_length": 406.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.1728, "grad_norm": 0.02492787502706051, "learning_rate": 1.0555555555555557e-06, "loss": -0.0009, "num_tokens": 35588541.0, "reward": 1.2370920181274414, "reward_std": 0.23188012838363647, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.4249304533004761, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -7.77822889486135e-07, "aux_brier/mean_group_std": 0.052049492159757335, "aux_brier/mean_r": 0.9677971015727546, "aux_brier/n_active_tok": 225.125, "aux_brier/n_groups": 13.03125, "aux_brier/n_step_records": 56.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6053039702233252, "calib/avg_num_step_conf": 7.04296875, "calib/ece": 0.4588582677165355, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0048238213399503654, "calib/mean_conf": 0.05295275590551181, "calib/mu_c": 0.0553076923076923, "calib/mu_w": 0.050483870967741935, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01644430268604241, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 443.4921875, "completions/mean_terminated_length": 443.4921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.17386666666666667, "grad_norm": 0.008522797375917435, "learning_rate": 1.0277777777777777e-06, "loss": 0.0339, "num_tokens": 35806907.0, "reward": 1.1382803916931152, "reward_std": 0.20851092040538788, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5374965071678162, "rewards/format_reward_step": 0.9921875, "step": 163 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.820982779625172e-06, "aux_brier/mean_group_std": 0.03242034688768444, "aux_brier/mean_r": 0.9802378489130346, "aux_brier/n_active_tok": 240.375, "aux_brier/n_groups": 15.84375, "aux_brier/n_step_records": 60.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.582537560478737, "calib/avg_num_step_conf": 7.74609375, "calib/ece": 0.47485258964143423, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027198879551820695, "calib/mean_conf": 0.05104382470119522, "calib/mu_c": 0.05233333333333333, "calib/mu_w": 0.04961344537815126, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012386620131417099, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2792.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 466.140625, "completions/mean_terminated_length": 467.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.17493333333333333, "grad_norm": 0.015697116032242775, "learning_rate": 1.0000000000000002e-06, "loss": 0.0351, "num_tokens": 36032375.0, "reward": 1.1348862648010254, "reward_std": 0.26142996549606323, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.516107439994812, "rewards/format_reward_step": 0.98046875, "step": 164 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.5277262844246593e-07, "aux_brier/mean_group_std": 0.050297317082726646, "aux_brier/mean_r": 0.9672013091528888, "aux_brier/n_active_tok": 243.625, "aux_brier/n_groups": 14.53125, "aux_brier/n_step_records": 60.90625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.61565260669404, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.3954117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.006390444195595382, "calib/mean_conf": 0.05909803921568627, "calib/mu_c": 0.06263157894736843, "calib/mu_w": 0.056241134751773045, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003725490196078431, "calib/std_conf": 0.08267803248129973, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 438.484375, "completions/mean_terminated_length": 440.2039489746094, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.176, "grad_norm": 0.047477055341005325, "learning_rate": 9.722222222222224e-07, "loss": 0.0018, "num_tokens": 36250203.0, "reward": 1.0924279689788818, "reward_std": 0.187387153506279, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5962746143341064, "rewards/format_reward_step": 0.99609375, "step": 165 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.171900063494217e-07, "aux_brier/mean_group_std": 0.06750884783215277, "aux_brier/mean_r": 0.9568796035838543, "aux_brier/n_active_tok": 246.625, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 61.65625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5955882352941176, "calib/avg_num_step_conf": 7.70703125, "calib/ece": 0.5528853754940711, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00303267973856209, "calib/mean_conf": 0.053833992094861664, "calib/mu_c": 0.055032679738562094, "calib/mu_w": 0.052000000000000005, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009881422924901185, "calib/std_conf": 0.01852047554729525, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 486.08203125, "completions/mean_terminated_length": 486.08203125, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.17706666666666668, "grad_norm": 0.009201133623719215, "learning_rate": 9.444444444444445e-07, "loss": 0.0739, "num_tokens": 36480824.0, "reward": 1.20216965675354, "reward_std": 0.2176104187965393, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.44930312037467957, "rewards/format_reward_step": 0.984375, "step": 166 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.838929182349183e-08, "aux_brier/mean_group_std": 0.06980713138458353, "aux_brier/mean_r": 0.9489904823940546, "aux_brier/n_active_tok": 220.0, "aux_brier/n_groups": 13.375, "aux_brier/n_step_records": 55.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5848094471282876, "calib/avg_num_step_conf": 6.875, "calib/ece": 0.5824409448818898, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005668276972624793, "calib/mean_conf": 0.05535433070866142, "calib/mu_c": 0.0574074074074074, "calib/mu_w": 0.05173913043478261, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01705919640640684, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 422.1796875, "completions/mean_terminated_length": 423.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.17813333333333334, "grad_norm": 0.0106651084497571, "learning_rate": 9.166666666666666e-07, "loss": -0.0091, "num_tokens": 36694510.0, "reward": 1.236081838607788, "reward_std": 0.18263085186481476, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.42870235443115234, "rewards/format_reward_step": 0.9921875, "step": 167 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2900859430176048e-06, "aux_brier/mean_group_std": 0.07416423545452544, "aux_brier/mean_r": 0.9448001503083485, "aux_brier/n_active_tok": 240.25, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 60.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4915302626570231, "calib/avg_num_step_conf": 7.59765625, "calib/ece": 0.5072964426877471, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011799264052785169, "calib/mean_conf": 0.05396837944664032, "calib/mu_c": 0.053450704225352114, "calib/mu_w": 0.05463063063063063, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01717058646832708, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2539.0, "completions/max_terminated_length": 2539.0, "completions/mean_length": 491.74609375, "completions/mean_terminated_length": 493.6745300292969, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.1792, "grad_norm": 0.09920989722013474, "learning_rate": 8.88888888888889e-07, "loss": 0.0508, "num_tokens": 36925069.0, "reward": 1.1712583303451538, "reward_std": 0.26346373558044434, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.48972082138061523, "rewards/format_reward_step": 0.98828125, "step": 168 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 9.291881220008236e-07, "aux_brier/mean_group_std": 0.046320013962909065, "aux_brier/mean_r": 0.9691474285849738, "aux_brier/n_active_tok": 222.375, "aux_brier/n_groups": 12.90625, "aux_brier/n_step_records": 55.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5444451317580107, "calib/avg_num_step_conf": 6.9609375, "calib/ece": 0.48094117647058826, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002918470864777936, "calib/mean_conf": 0.05631372549019607, "calib/mu_c": 0.05766423357664234, "calib/mu_w": 0.054745762711864404, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.017727225318511404, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1188.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 428.91796875, "completions/mean_terminated_length": 430.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.18026666666666666, "grad_norm": 0.03275342658162117, "learning_rate": 8.611111111111112e-07, "loss": 0.0182, "num_tokens": 37139056.0, "reward": 1.1580235958099365, "reward_std": 0.22460977733135223, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5149070024490356, "rewards/format_reward_step": 0.98828125, "step": 169 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.7450650141740427e-06, "aux_brier/mean_group_std": 0.047810629905576524, "aux_brier/mean_r": 0.9700219712996427, "aux_brier/n_active_tok": 250.125, "aux_brier/n_groups": 13.96875, "aux_brier/n_step_records": 62.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5746103896103896, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.50284, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0033701298701298496, "calib/mean_conf": 0.05716000000000001, "calib/mu_c": 0.05864285714285712, "calib/mu_w": 0.05527272727272727, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.017399264352265013, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1858.0, "completions/max_terminated_length": 1858.0, "completions/mean_length": 453.69921875, "completions/mean_terminated_length": 457.2716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.18133333333333335, "grad_norm": 0.03828566521406174, "learning_rate": 8.333333333333333e-07, "loss": -0.0102, "num_tokens": 37359355.0, "reward": 1.157741665840149, "reward_std": 0.20567163825035095, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.49034178256988525, "rewards/format_reward_step": 0.9765625, "step": 170 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -7.616850770109096e-07, "aux_brier/mean_group_std": 0.06064033757296603, "aux_brier/mean_r": 0.9528602629857266, "aux_brier/n_active_tok": 229.625, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 57.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5076825396825397, "calib/avg_num_step_conf": 7.1875, "calib/ece": 0.44635059760956175, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.007371428571428598, "calib/mean_conf": 0.0609003984063745, "calib/mu_c": 0.0572, "calib/mu_w": 0.0645714285714286, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0046215139442231075, "calib/std_conf": 0.06310405338507291, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 434.984375, "completions/mean_terminated_length": 436.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.1824, "grad_norm": 0.02412530779838562, "learning_rate": 8.055555555555557e-07, "loss": 0.0243, "num_tokens": 37577607.0, "reward": 1.1136420965194702, "reward_std": 0.26265227794647217, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.54050612449646, "rewards/format_reward_step": 0.98046875, "step": 171 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.4629824352796472e-06, "aux_brier/mean_group_std": 0.050449672669728565, "aux_brier/mean_r": 0.972808238036711, "aux_brier/n_active_tok": 214.375, "aux_brier/n_groups": 12.125, "aux_brier/n_step_records": 53.59375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5078609986504723, "calib/avg_num_step_conf": 7.1171875, "calib/ece": 0.5616334661354581, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002009446693657234, "calib/mean_conf": 0.05988047808764941, "calib/mu_c": 0.060641025641025637, "calib/mu_w": 0.0586315789473684, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.018840535931701704, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2404.0, "completions/max_terminated_length": 2404.0, "completions/mean_length": 401.13671875, "completions/mean_terminated_length": 407.5039978027344, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.18346666666666667, "grad_norm": 0.4477880299091339, "learning_rate": 7.777777777777779e-07, "loss": -0.0563, "num_tokens": 37783650.0, "reward": 1.2098934650421143, "reward_std": 0.2365962117910385, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.44113630056381226, "rewards/format_reward_step": 0.98046875, "step": 172 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.0819355003052227e-07, "aux_brier/mean_group_std": 0.06625295315938617, "aux_brier/mean_r": 0.9609529933912402, "aux_brier/n_active_tok": 236.5, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 59.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5375490196078432, "calib/avg_num_step_conf": 7.70703125, "calib/ece": 0.5448221343873517, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020189542483660325, "calib/mean_conf": 0.05992094861660079, "calib/mu_c": 0.06071895424836603, "calib/mu_w": 0.058699999999999995, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.019048349027701958, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 430.2421875, "completions/mean_terminated_length": 433.6299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.18453333333333333, "grad_norm": 0.014841507188975811, "learning_rate": 7.5e-07, "loss": -0.0398, "num_tokens": 37996952.0, "reward": 1.2026280164718628, "reward_std": 0.2117912322282791, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.4511374831199646, "rewards/format_reward_step": 0.9765625, "step": 173 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.120774879019407e-06, "aux_brier/mean_group_std": 0.061135886942515615, "aux_brier/mean_r": 0.9600173147391435, "aux_brier/n_active_tok": 238.5, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 59.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.535721257075932, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.37390000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.01181111327997919, "calib/mean_conf": 0.06210000000000001, "calib/mu_c": 0.06876146788990826, "calib/mu_w": 0.056950354609929074, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.062023302072688774, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2635.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 499.42578125, "completions/mean_terminated_length": 501.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.1856, "grad_norm": 0.07152820378541946, "learning_rate": 7.222222222222222e-07, "loss": 0.0406, "num_tokens": 38229037.0, "reward": 1.0576062202453613, "reward_std": 0.26821964979171753, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5976123809814453, "rewards/format_reward_step": 0.96484375, "step": 174 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.7371069238880654e-06, "aux_brier/mean_group_std": 0.05884674309013523, "aux_brier/mean_r": 0.9630362532326565, "aux_brier/n_active_tok": 227.75, "aux_brier/n_groups": 14.96875, "aux_brier/n_step_records": 56.9375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5827347879116411, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.3154701195219124, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00540852419026968, "calib/mean_conf": 0.05903187250996017, "calib/mu_c": 0.062414893617021266, "calib/mu_w": 0.05700636942675159, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01866391206647686, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2398.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 458.15625, "completions/mean_terminated_length": 459.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.18666666666666668, "grad_norm": 0.07808632403612137, "learning_rate": 6.944444444444446e-07, "loss": -0.0022, "num_tokens": 38452149.0, "reward": 1.0183327198028564, "reward_std": 0.2497301697731018, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6514562368392944, "rewards/format_reward_step": 0.9765625, "step": 175 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.5985905837956338e-06, "aux_brier/mean_group_std": 0.0884663780686098, "aux_brier/mean_r": 0.9432563578126447, "aux_brier/n_active_tok": 237.875, "aux_brier/n_groups": 13.84375, "aux_brier/n_step_records": 59.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5465293668954996, "calib/avg_num_step_conf": 7.44140625, "calib/ece": 0.39305555555555555, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00283371472158659, "calib/mean_conf": 0.05932539682539682, "calib/mu_c": 0.060877192982456155, "calib/mu_w": 0.058043478260869565, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.018514828619535843, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2286.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 443.7734375, "completions/mean_terminated_length": 447.2677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.18773333333333334, "grad_norm": 0.06450923532247543, "learning_rate": 6.666666666666667e-07, "loss": -0.0179, "num_tokens": 38669819.0, "reward": 1.0819435119628906, "reward_std": 0.21873226761817932, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5855870842933655, "rewards/format_reward_step": 0.98046875, "step": 176 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -5.160022302330702e-06, "aux_brier/mean_group_std": 0.05379418406876082, "aux_brier/mean_r": 0.96433970289857, "aux_brier/n_active_tok": 254.25, "aux_brier/n_groups": 16.15625, "aux_brier/n_step_records": 63.5625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6234283186958995, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.41174603174603175, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.012313135780628051, "calib/mean_conf": 0.060476190476190475, "calib/mu_c": 0.06697478991596639, "calib/mu_w": 0.05466165413533834, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06021597561130154, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 480.9765625, "completions/mean_terminated_length": 480.9765625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.1888, "grad_norm": 0.007034329231828451, "learning_rate": 6.388888888888889e-07, "loss": 0.0926, "num_tokens": 38896781.0, "reward": 1.0977587699890137, "reward_std": 0.20862998068332672, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5707226395606995, "rewards/format_reward_step": 0.98046875, "step": 177 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.732071380996743e-07, "aux_brier/mean_group_std": 0.06649860599247942, "aux_brier/mean_r": 0.9601318942695777, "aux_brier/n_active_tok": 228.5, "aux_brier/n_groups": 12.90625, "aux_brier/n_step_records": 57.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5752877237851662, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.48394422310756974, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0031643222506393884, "calib/mean_conf": 0.057888446215139444, "calib/mu_c": 0.05933823529411765, "calib/mu_w": 0.05617391304347826, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01753549348745854, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 431.89453125, "completions/mean_terminated_length": 435.2952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.18986666666666666, "grad_norm": 0.045542120933532715, "learning_rate": 6.111111111111112e-07, "loss": 0.0288, "num_tokens": 39113418.0, "reward": 1.1467008590698242, "reward_std": 0.2595553398132324, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5086784958839417, "rewards/format_reward_step": 0.9765625, "step": 178 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.311839944202365e-06, "aux_brier/mean_group_std": 0.055502624836597944, "aux_brier/mean_r": 0.9660979821513774, "aux_brier/n_active_tok": 238.125, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 59.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.509009009009009, "calib/avg_num_step_conf": 7.46484375, "calib/ece": 0.504110671936759, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007080319756376033, "calib/mean_conf": 0.057154150197628456, "calib/mu_c": 0.057464788732394356, "calib/mu_w": 0.05675675675675675, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016267403346250885, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 464.83203125, "completions/mean_terminated_length": 464.83203125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.19093333333333334, "grad_norm": 0.009458055719733238, "learning_rate": 5.833333333333334e-07, "loss": 0.0128, "num_tokens": 39338679.0, "reward": 1.1722915172576904, "reward_std": 0.20855097472667694, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.493853896856308, "rewards/format_reward_step": 0.98828125, "step": 179 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.2989589782408384e-06, "aux_brier/mean_group_std": 0.0686843718934011, "aux_brier/mean_r": 0.9488519305117243, "aux_brier/n_active_tok": 269.625, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 67.40625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5707132704483784, "calib/avg_num_step_conf": 8.44140625, "calib/ece": 0.5084462151394422, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.0028240082698023083, "calib/mean_conf": 0.06509960159362552, "calib/mu_c": 0.06387323943661972, "calib/mu_w": 0.06669724770642203, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003904382470119522, "calib/std_conf": 0.061094678987928926, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 524.140625, "completions/mean_terminated_length": 526.1961059570312, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.192, "grad_norm": 0.0794568806886673, "learning_rate": 5.555555555555555e-07, "loss": 0.048, "num_tokens": 39576715.0, "reward": 1.1642001867294312, "reward_std": 0.24491116404533386, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.484925776720047, "rewards/format_reward_step": 0.9765625, "step": 180 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.351923299532711e-06, "aux_brier/mean_group_std": 0.04639050225856281, "aux_brier/mean_r": 0.9709694630155832, "aux_brier/n_active_tok": 231.5, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 57.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5408491577818435, "calib/avg_num_step_conf": 7.25390625, "calib/ece": 0.41938492063492055, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004987067062015224, "calib/mean_conf": 0.06077380952380953, "calib/mu_c": 0.061033057851239685, "calib/mu_w": 0.06053435114503816, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.017903412278161224, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2711.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 442.34765625, "completions/mean_terminated_length": 444.0823669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.19306666666666666, "grad_norm": 0.008770574815571308, "learning_rate": 5.277777777777779e-07, "loss": 0.066, "num_tokens": 39796220.0, "reward": 1.1041061878204346, "reward_std": 0.27390772104263306, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5648627877235413, "rewards/format_reward_step": 0.98046875, "step": 181 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.4054107825864932e-06, "aux_brier/mean_group_std": 0.06660856474245658, "aux_brier/mean_r": 0.9614253927732934, "aux_brier/n_active_tok": 255.625, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 63.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.579108617131063, "calib/avg_num_step_conf": 7.98828125, "calib/ece": 0.539763779527559, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004971620227038204, "calib/mean_conf": 0.05866141732283465, "calib/mu_c": 0.06065789473684211, "calib/mu_w": 0.05568627450980391, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015616577338039712, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2135.0, "completions/max_terminated_length": 2135.0, "completions/mean_length": 478.85546875, "completions/mean_terminated_length": 478.85546875, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 0.19413333333333332, "grad_norm": 0.007696555461734533, "learning_rate": 5.000000000000001e-07, "loss": 0.0217, "num_tokens": 40024967.0, "reward": 1.2044800519943237, "reward_std": 0.18606093525886536, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.4663578271865845, "rewards/format_reward_step": 0.98828125, "step": 182 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -6.094010174262365e-07, "aux_brier/mean_group_std": 0.05322795634776855, "aux_brier/mean_r": 0.9653928837478363, "aux_brier/n_active_tok": 257.25, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 64.3125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5117647058823529, "calib/avg_num_step_conf": 8.1953125, "calib/ece": 0.4218072289156627, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015061409179056226, "calib/mean_conf": 0.056104417670682735, "calib/mu_c": 0.056890756302521, "calib/mu_w": 0.05538461538461538, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016637076359789144, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 516.234375, "completions/mean_terminated_length": 520.2991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.1952, "grad_norm": 0.2508448362350464, "learning_rate": 4.7222222222222226e-07, "loss": 0.014, "num_tokens": 40263803.0, "reward": 1.0914976596832275, "reward_std": 0.29345816373825073, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5534909963607788, "rewards/format_reward_step": 0.96875, "step": 183 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 5.572338486192763e-06, "aux_brier/mean_group_std": 0.06606226210886477, "aux_brier/mean_r": 0.9525124889424592, "aux_brier/n_active_tok": 250.875, "aux_brier/n_groups": 15.75, "aux_brier/n_step_records": 62.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5611282248748555, "calib/avg_num_step_conf": 7.83984375, "calib/ece": 0.5181818181818181, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003923758182518285, "calib/mean_conf": 0.06284584980237155, "calib/mu_c": 0.06448979591836734, "calib/mu_w": 0.06056603773584906, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.020560427486509943, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 465.7734375, "completions/mean_terminated_length": 465.7734375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.19626666666666667, "grad_norm": 0.008814026601612568, "learning_rate": 4.444444444444445e-07, "loss": 0.0705, "num_tokens": 40488321.0, "reward": 1.1893103122711182, "reward_std": 0.25937968492507935, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.483803927898407, "rewards/format_reward_step": 0.98828125, "step": 184 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.1588724361244473e-06, "aux_brier/mean_group_std": 0.0680926022584669, "aux_brier/mean_r": 0.9577335660485276, "aux_brier/n_active_tok": 266.125, "aux_brier/n_groups": 18.21875, "aux_brier/n_step_records": 66.53125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5301294982755255, "calib/avg_num_step_conf": 8.625, "calib/ece": 0.45564516129032256, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006962972603631185, "calib/mean_conf": 0.05895161290322582, "calib/mu_c": 0.059291338582677176, "calib/mu_w": 0.05859504132231406, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00125, "calib/std_conf": 0.02424067989794577, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 486.45703125, "completions/mean_terminated_length": 490.28741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.19733333333333333, "grad_norm": 0.06516099721193314, "learning_rate": 4.1666666666666667e-07, "loss": 0.0267, "num_tokens": 40719774.0, "reward": 1.1123558282852173, "reward_std": 0.2039007544517517, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5275484323501587, "rewards/format_reward_step": 0.96875, "step": 185 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.4122980576125599e-06, "aux_brier/mean_group_std": 0.05753200831765791, "aux_brier/mean_r": 0.9665061358973027, "aux_brier/n_active_tok": 241.625, "aux_brier/n_groups": 14.59375, "aux_brier/n_step_records": 60.40625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5528299555116164, "calib/avg_num_step_conf": 7.55078125, "calib/ece": 0.4765490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0023739495798319465, "calib/mean_conf": 0.05874509803921569, "calib/mu_c": 0.059852941176470595, "calib/mu_w": 0.05747899159663865, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000980392156862745, "calib/std_conf": 0.02178310474751863, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 467.29296875, "completions/mean_terminated_length": 467.29296875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.1984, "grad_norm": 0.009285667911171913, "learning_rate": 3.8888888888888895e-07, "loss": 0.0191, "num_tokens": 40944441.0, "reward": 1.1604286432266235, "reward_std": 0.22991374135017395, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5245273113250732, "rewards/format_reward_step": 0.99609375, "step": 186 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 4.875297361373043e-07, "aux_brier/mean_group_std": 0.05111810074396666, "aux_brier/mean_r": 0.9716391110350127, "aux_brier/n_active_tok": 291.75, "aux_brier/n_groups": 22.875, "aux_brier/n_step_records": 72.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5274491094147583, "calib/avg_num_step_conf": 9.12109375, "calib/ece": 0.46223107569721117, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0016469465648855097, "calib/mean_conf": 0.062390438247011966, "calib/mu_c": 0.061603053435114505, "calib/mu_w": 0.06325000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001354581673306773, "calib/std_conf": 0.025213489800807613, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2771.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 499.36328125, "completions/mean_terminated_length": 503.2952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.19946666666666665, "grad_norm": 0.0630650669336319, "learning_rate": 3.611111111111111e-07, "loss": 0.006, "num_tokens": 41173822.0, "reward": 1.1337924003601074, "reward_std": 0.2610511779785156, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5273569822311401, "rewards/format_reward_step": 0.98046875, "step": 187 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 3.1933831217134667e-06, "aux_brier/mean_group_std": 0.0658192235539247, "aux_brier/mean_r": 0.959594664071407, "aux_brier/n_active_tok": 255.625, "aux_brier/n_groups": 16.90625, "aux_brier/n_step_records": 63.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5583759718253705, "calib/avg_num_step_conf": 8.32421875, "calib/ece": 0.5389079999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0030518306864243602, "calib/mean_conf": 0.06309200000000001, "calib/mu_c": 0.06185906040268455, "calib/mu_w": 0.06491089108910891, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003, "calib/std_conf": 0.048295543645350957, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 512.6796875, "completions/mean_terminated_length": 514.6902465820312, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.20053333333333334, "grad_norm": 0.020949432626366615, "learning_rate": 3.3333333333333335e-07, "loss": 0.0827, "num_tokens": 41409140.0, "reward": 1.1771697998046875, "reward_std": 0.21333910524845123, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4508669376373291, "rewards/format_reward_step": 0.96484375, "step": 188 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -4.662979643244025e-07, "aux_brier/mean_group_std": 0.06215085901039336, "aux_brier/mean_r": 0.9365707571128276, "aux_brier/n_active_tok": 234.0, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 58.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6068052930056712, "calib/avg_num_step_conf": 7.3125, "calib/ece": 0.48778656126482206, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004202898550724622, "calib/mean_conf": 0.05924901185770752, "calib/mu_c": 0.06115942028985507, "calib/mu_w": 0.05695652173913045, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007905138339920949, "calib/std_conf": 0.01812967260233465, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1738.0, "completions/max_terminated_length": 1738.0, "completions/mean_length": 445.37109375, "completions/mean_terminated_length": 445.37109375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.2016, "grad_norm": 0.009667615406215191, "learning_rate": 3.055555555555556e-07, "loss": 0.0073, "num_tokens": 41630923.0, "reward": 1.161043643951416, "reward_std": 0.223192036151886, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5113620758056641, "rewards/format_reward_step": 0.98828125, "step": 189 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.4906425744865004e-06, "aux_brier/mean_group_std": 0.06953513418144637, "aux_brier/mean_r": 0.957203299778975, "aux_brier/n_active_tok": 278.25, "aux_brier/n_groups": 15.4375, "aux_brier/n_step_records": 69.5625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6428019323671498, "calib/avg_num_step_conf": 9.20703125, "calib/ece": 0.4777199999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.015977455716586146, "calib/mean_conf": 0.06228, "calib/mu_c": 0.06962962962962962, "calib/mu_w": 0.05365217391304348, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06275031155301143, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2023.0, "completions/max_terminated_length": 2023.0, "completions/mean_length": 510.2109375, "completions/mean_terminated_length": 518.3095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.20266666666666666, "grad_norm": 0.017394818365573883, "learning_rate": 2.7777777777777776e-07, "loss": -0.0192, "num_tokens": 41867145.0, "reward": 1.148287057876587, "reward_std": 0.2366393804550171, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5150230526924133, "rewards/format_reward_step": 0.9765625, "step": 190 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -2.85897069242369e-06, "aux_brier/mean_group_std": 0.0496645300252347, "aux_brier/mean_r": 0.9716731173971757, "aux_brier/n_active_tok": 270.75, "aux_brier/n_groups": 18.25, "aux_brier/n_step_records": 67.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5701904761904762, "calib/avg_num_step_conf": 8.51171875, "calib/ece": 0.4376494023904382, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008969523809523816, "calib/mean_conf": 0.06434262948207173, "calib/mu_c": 0.06880952380952382, "calib/mu_w": 0.059840000000000004, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.03304991600091439, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3015.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 449.8984375, "completions/mean_terminated_length": 453.4409484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.20373333333333332, "grad_norm": 0.036954063922166824, "learning_rate": 2.5000000000000004e-07, "loss": 0.0236, "num_tokens": 42086487.0, "reward": 1.1174577474594116, "reward_std": 0.21388424932956696, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5479558706283569, "rewards/format_reward_step": 0.9765625, "step": 191 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.8904280134623974e-06, "aux_brier/mean_group_std": 0.06223107335754051, "aux_brier/mean_r": 0.9547934977486502, "aux_brier/n_active_tok": 262.375, "aux_brier/n_groups": 18.125, "aux_brier/n_step_records": 65.59375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6210419130206281, "calib/avg_num_step_conf": 8.609375, "calib/ece": 0.42072874493927126, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008008146104322704, "calib/mean_conf": 0.05700404858299596, "calib/mu_c": 0.06118644067796611, "calib/mu_w": 0.053178294573643405, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.019297688807874764, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 510.125, "completions/mean_terminated_length": 518.2222290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.2048, "grad_norm": 0.04598285257816315, "learning_rate": 2.2222222222222224e-07, "loss": 0.0224, "num_tokens": 42322055.0, "reward": 1.086470127105713, "reward_std": 0.23417028784751892, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5568180084228516, "rewards/format_reward_step": 0.96484375, "step": 192 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -9.376260637905531e-07, "aux_brier/mean_group_std": 0.04323480778009911, "aux_brier/mean_r": 0.9706668702024487, "aux_brier/n_active_tok": 289.875, "aux_brier/n_groups": 19.59375, "aux_brier/n_step_records": 72.46875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5318066157760815, "calib/avg_num_step_conf": 9.2890625, "calib/ece": 0.4664112903225806, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018170548704899842, "calib/mean_conf": 0.06181451612903227, "calib/mu_c": 0.06267175572519083, "calib/mu_w": 0.06085470085470085, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02339039882139536, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2701.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 506.421875, "completions/mean_terminated_length": 512.4268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.20586666666666667, "grad_norm": 0.047434333711862564, "learning_rate": 1.9444444444444447e-07, "loss": 0.075, "num_tokens": 42557411.0, "reward": 1.122399091720581, "reward_std": 0.30837562680244446, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5130344033241272, "rewards/format_reward_step": 0.96484375, "step": 193 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.8079859654427395e-07, "aux_brier/mean_group_std": 0.07902714387319085, "aux_brier/mean_r": 0.9493729210776587, "aux_brier/n_active_tok": 221.0, "aux_brier/n_groups": 11.375, "aux_brier/n_step_records": 55.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6139731827805222, "calib/avg_num_step_conf": 7.15234375, "calib/ece": 0.5078968253968255, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009739526528517353, "calib/mean_conf": 0.05956349206349207, "calib/mu_c": 0.06377622377622377, "calib/mu_w": 0.05403669724770642, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.03417113699715753, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2643.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 415.90234375, "completions/mean_terminated_length": 419.1771545410156, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.20693333333333333, "grad_norm": 0.04716826602816582, "learning_rate": 1.6666666666666668e-07, "loss": 0.0392, "num_tokens": 42769826.0, "reward": 1.1682668924331665, "reward_std": 0.2654598355293274, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.485567569732666, "rewards/format_reward_step": 0.9765625, "step": 194 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 1.943249178779083e-06, "aux_brier/mean_group_std": 0.06785813019765813, "aux_brier/mean_r": 0.9545375591300149, "aux_brier/n_active_tok": 236.125, "aux_brier/n_groups": 13.46875, "aux_brier/n_step_records": 59.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6068923076923077, "calib/avg_num_step_conf": 7.47265625, "calib/ece": 0.4565098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.00082153846153845, "calib/mean_conf": 0.06105882352941176, "calib/mu_c": 0.061461538461538456, "calib/mu_w": 0.060640000000000006, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0038823529411764705, "calib/std_conf": 0.061248901315382034, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 453.4296875, "completions/mean_terminated_length": 455.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.208, "grad_norm": 0.01760878600180149, "learning_rate": 1.3888888888888888e-07, "loss": 0.0039, "num_tokens": 42991888.0, "reward": 1.139699935913086, "reward_std": 0.23205438256263733, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5431749820709229, "rewards/format_reward_step": 0.9921875, "step": 195 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -3.2844297479517337e-06, "aux_brier/mean_group_std": 0.06294050563975621, "aux_brier/mean_r": 0.9594141956263865, "aux_brier/n_active_tok": 220.5, "aux_brier/n_groups": 11.21875, "aux_brier/n_step_records": 55.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5544950544950545, "calib/avg_num_step_conf": 7.08203125, "calib/ece": 0.5030314960629921, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003930573930573902, "calib/mean_conf": 0.05996062992125985, "calib/mu_c": 0.06167832167832166, "calib/mu_w": 0.057747747747747755, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016848047890287928, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 384.16015625, "completions/mean_terminated_length": 387.1850280761719, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.20906666666666668, "grad_norm": 0.019564451649785042, "learning_rate": 1.1111111111111112e-07, "loss": -0.0211, "num_tokens": 43192777.0, "reward": 1.1793503761291504, "reward_std": 0.18318453431129456, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.4986511766910553, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.362222672551816e-06, "aux_brier/mean_group_std": 0.08313946287186962, "aux_brier/mean_r": 0.9432585961029891, "aux_brier/n_active_tok": 259.5, "aux_brier/n_groups": 16.59375, "aux_brier/n_step_records": 64.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5997772536687631, "calib/avg_num_step_conf": 8.5390625, "calib/ece": 0.36132000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004682914046121568, "calib/mean_conf": 0.06268000000000001, "calib/mu_c": 0.06537735849056603, "calib/mu_w": 0.06069444444444446, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01942209051569887, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 446.7109375, "completions/mean_terminated_length": 452.0079345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.21013333333333334, "grad_norm": 0.02286953665316105, "learning_rate": 8.333333333333334e-08, "loss": 0.0234, "num_tokens": 43412191.0, "reward": 1.055452585220337, "reward_std": 0.2655121088027954, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6124355792999268, "rewards/format_reward_step": 0.9765625, "step": 197 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.6347661127014135e-06, "aux_brier/mean_group_std": 0.055640640710336864, "aux_brier/mean_r": 0.9689238525694506, "aux_brier/n_active_tok": 252.375, "aux_brier/n_groups": 15.5, "aux_brier/n_step_records": 63.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6123111582295255, "calib/avg_num_step_conf": 8.05078125, "calib/ece": 0.5529761904761904, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00546382189239332, "calib/mean_conf": 0.058134920634920643, "calib/mu_c": 0.06025974025974027, "calib/mu_w": 0.05479591836734695, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015510290068518198, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 456.9375, "completions/mean_terminated_length": 460.5354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.2112, "grad_norm": 0.015106615610420704, "learning_rate": 5.555555555555556e-08, "loss": 0.0456, "num_tokens": 43634551.0, "reward": 1.2046021223068237, "reward_std": 0.23903879523277283, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.4512211084365845, "rewards/format_reward_step": 0.98046875, "step": 198 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": -1.486556655438953e-06, "aux_brier/mean_group_std": 0.06384287553559435, "aux_brier/mean_r": 0.9551035533752109, "aux_brier/n_active_tok": 250.875, "aux_brier/n_groups": 13.4375, "aux_brier/n_step_records": 62.71875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5826656243892907, "calib/avg_num_step_conf": 8.23046875, "calib/ece": 0.4610725806451613, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0026099928343430376, "calib/mean_conf": 0.05908870967741936, "calib/mu_c": 0.060341085271317825, "calib/mu_w": 0.05773109243697479, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.017337965100291862, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 499.125, "completions/mean_terminated_length": 505.0434875488281, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.21226666666666666, "grad_norm": 0.1626821905374527, "learning_rate": 2.777777777777778e-08, "loss": 0.0458, "num_tokens": 43866527.0, "reward": 1.1197582483291626, "reward_std": 0.3326494097709656, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5180955529212952, "rewards/format_reward_step": 0.96484375, "step": 199 }, { "aux_brier/lambda": 0.25, "aux_brier/loss": 2.888534441636281e-07, "aux_brier/mean_group_std": 0.05886517239074551, "aux_brier/mean_r": 0.9617580087783129, "aux_brier/n_active_tok": 239.0, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 59.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6468071519795658, "calib/avg_num_step_conf": 7.6953125, "calib/ece": 0.5157312252964427, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0078882503192848, "calib/mean_conf": 0.057391304347826085, "calib/mu_c": 0.06075862068965517, "calib/mu_w": 0.05287037037037037, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02087779811258606, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1631.0, "completions/max_terminated_length": 1631.0, "completions/mean_length": 469.28515625, "completions/mean_terminated_length": 472.9803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.21333333333333335, "grad_norm": 0.24779270589351654, "learning_rate": 0.0, "loss": 0.0147, "num_tokens": 44094712.0, "reward": 1.1823010444641113, "reward_std": 0.2039198875427246, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.4870171844959259, "rewards/format_reward_step": 0.98828125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.03735336397076026, "train_runtime": 15822.0969, "train_samples_per_second": 3.236, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44094712, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }