{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.5, "aux_brier/loss": -9.102586197069666e-06, "aux_brier/mean_r": 0.4665906016643231, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_step_records": 6.153846153846154, "aux_brier/std_r": 0.17818317848902482, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 2.8844168186187744, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.2052168732018822e-07, "aux_brier/mean_r": 0.43024390700616333, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_step_records": 7.105263157894737, "aux_brier/std_r": 0.16724381615456782, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.7005276083946228, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -9.167067283044616e-09, "aux_brier/mean_r": 0.45051724314689634, "aux_brier/n_active_tok": 23.733333333333334, "aux_brier/n_step_records": 5.933333333333334, "aux_brier/std_r": 0.1999872013926506, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.34765625, "calib/ece": 0.6871428571428569, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6428571428571429, "calib/gap": 0.17500000000000016, "calib/mean_conf": 0.8299999999999998, "calib/mu_c": 0.98, "calib/mu_w": 0.8049999999999998, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.6871428571428569, "calib/std_conf": 0.27105613546169466, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 647.57421875, "completions/mean_terminated_length": 717.6580200195312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.8750516176223755, "learning_rate": 7.5e-07, "loss": 0.0077, "num_tokens": 804502.0, "reward": 0.03048769384622574, "reward_std": 0.07659061253070831, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.012575780972838402, "rewards/format_reward_step": 0.0390625, "step": 3 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -9.423194446459401e-06, "aux_brier/mean_r": 0.4603502447051661, "aux_brier/n_active_tok": 21.142857142857142, "aux_brier/n_step_records": 5.285714285714286, "aux_brier/std_r": 0.17281669192016125, "calib/answer_extract_rate": 0.06640625, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.9033333333333331, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/mean_conf": 0.9033333333333332, "calib/mu_c": NaN, "calib/mu_w": 0.9033333333333332, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.9033333333333331, "calib/std_conf": 0.2013427150132055, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 756.328125, "completions/mean_terminated_length": 803.4025268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.004266666666666667, "grad_norm": 0.12523384392261505, "learning_rate": 1.0000000000000002e-06, "loss": 0.012, "num_tokens": 1104290.0, "reward": 0.026995116844773293, "reward_std": 0.05869061499834061, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006417968776077032, "rewards/format_reward_step": 0.04296875, "step": 4 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.0414590718540721e-07, "aux_brier/mean_r": 0.42980177174596224, "aux_brier/n_active_tok": 23.529411764705884, "aux_brier/n_step_records": 5.882352941176471, "aux_brier/std_r": 0.15063537394299226, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.2976190476190476, "calib/avg_num_step_conf": 0.390625, "calib/ece": 0.6139999999999999, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.003809523809523596, "calib/mean_conf": 0.914, "calib/mu_c": 0.9166666666666666, "calib/mu_w": 0.912857142857143, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.6139999999999999, "calib/std_conf": 0.16545089906071833, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 645.640625, "completions/mean_terminated_length": 737.8750610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.005333333333333333, "grad_norm": 2.265652894973755, "learning_rate": 1.25e-06, "loss": 0.0522, "num_tokens": 1376262.0, "reward": 0.06221386417746544, "reward_std": 0.1284206509590149, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.02229296788573265, "rewards/format_reward_step": 0.06640625, "step": 5 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -7.246716573927614e-07, "aux_brier/mean_r": 0.4946602717041969, "aux_brier/n_active_tok": 21.6, "aux_brier/n_step_records": 5.4, "aux_brier/std_r": 0.147001376375556, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.736842105263158, "calib/avg_num_step_conf": 0.421875, "calib/ece": 0.9035, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.03842105263157902, "calib/mean_conf": 0.9534999999999998, "calib/mu_c": 0.99, "calib/mu_w": 0.951578947368421, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.10546875, "calib/pce": 0.9035, "calib/std_conf": 0.04932291556670184, "calib/step_conf_rate": 0.10546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 600.02734375, "completions/mean_terminated_length": 662.09912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 5.252373218536377, "learning_rate": 1.5e-06, "loss": 0.0485, "num_tokens": 1635821.0, "reward": 0.037501074373722076, "reward_std": 0.08939608931541443, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.009379297494888306, "rewards/format_reward_step": 0.0625, "step": 6 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.3156230124378165e-05, "aux_brier/mean_r": 0.4707753310600917, "aux_brier/n_active_tok": 32.666666666666664, "aux_brier/n_step_records": 8.166666666666666, "aux_brier/std_r": 0.15070005040615797, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.4, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.43000000000000005, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.05049999999999999, "calib/mean_conf": 0.8744444444444445, "calib/mu_c": 0.852, "calib/mu_w": 0.9025, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.37444444444444447, "calib/std_conf": 0.1442306109527463, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 727.65625, "completions/mean_terminated_length": 796.0684204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.7824645638465881, "learning_rate": 1.75e-06, "loss": -0.0, "num_tokens": 1929525.0, "reward": 0.04040282964706421, "reward_std": 0.06662891805171967, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.020986327901482582, "rewards/format_reward_step": 0.03125, "step": 7 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.1616804963310301e-05, "aux_brier/mean_r": 0.48305028676986694, "aux_brier/n_active_tok": 37.23076923076923, "aux_brier/n_step_records": 9.307692307692308, "aux_brier/std_r": 0.1861779227596204, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.7111111111111111, "calib/avg_num_step_conf": 0.47265625, "calib/ece": 0.36526315789473685, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7894736842105263, "calib/gap": 0.14655555555555566, "calib/mean_conf": 0.8915789473684211, "calib/mu_c": 0.9610000000000001, "calib/mu_w": 0.8144444444444444, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.36526315789473685, "calib/std_conf": 0.21849396373439164, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2945.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 656.16796875, "completions/mean_terminated_length": 705.7941284179688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 3.3313205242156982, "learning_rate": 2.0000000000000003e-06, "loss": 0.0473, "num_tokens": 2204016.0, "reward": 0.07878877222537994, "reward_std": 0.13841167092323303, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.04171757772564888, "rewards/format_reward_step": 0.05859375, "step": 8 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.838604144933567e-06, "aux_brier/mean_r": 0.43672386397208485, "aux_brier/n_active_tok": 21.714285714285715, "aux_brier/n_step_records": 5.428571428571429, "aux_brier/std_r": 0.11355884306664978, "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.18359375, "calib/ece": 0.6925, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.023333333333333317, "calib/mean_conf": 0.9425, "calib/mu_c": 0.96, "calib/mu_w": 0.9366666666666666, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.6925, "calib/std_conf": 0.0204633819296811, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 607.3828125, "completions/mean_terminated_length": 691.0667114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 1.389180302619934, "learning_rate": 2.25e-06, "loss": -0.0, "num_tokens": 2467042.0, "reward": 0.008864061906933784, "reward_std": 0.02507135458290577, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0042062499560415745, "rewards/format_reward_step": 0.0078125, "step": 9 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.2531542951398364e-06, "aux_brier/mean_r": 0.39842765033245087, "aux_brier/n_active_tok": 23.428571428571427, "aux_brier/n_step_records": 5.857142857142857, "aux_brier/std_r": 0.1513023983154978, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.09999999999999998, "calib/avg_num_step_conf": 0.32421875, "calib/ece": 0.864090909090909, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.8636363636363636, "calib/gap": -0.04949999999999999, "calib/mean_conf": 0.9550000000000001, "calib/mu_c": 0.91, "calib/mu_w": 0.9595, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.864090909090909, "calib/std_conf": 0.044999999999999984, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 628.734375, "completions/mean_terminated_length": 682.0169677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.1392345428466797, "learning_rate": 2.5e-06, "loss": 0.0181, "num_tokens": 2734798.0, "reward": 0.04187284782528877, "reward_std": 0.0920235812664032, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.01124140527099371, "rewards/format_reward_step": 0.0625, "step": 10 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.0260406202251186e-05, "aux_brier/mean_r": 0.42152037968238193, "aux_brier/n_active_tok": 22.333333333333332, "aux_brier/n_step_records": 5.583333333333333, "aux_brier/std_r": 0.14621165953576565, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.28515625, "calib/ece": 0.8792857142857142, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": -0.11923076923076947, "calib/mean_conf": 0.9507142857142858, "calib/mu_c": 0.84, "calib/mu_w": 0.9592307692307694, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.8792857142857142, "calib/std_conf": 0.042167063266129824, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 650.1328125, "completions/mean_terminated_length": 743.0089721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 1.3710510730743408, "learning_rate": 2.7500000000000004e-06, "loss": -0.0253, "num_tokens": 3005712.0, "reward": 0.031078124418854713, "reward_std": 0.07516779005527496, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.00712500000372529, "rewards/format_reward_step": 0.04296875, "step": 11 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.764629376095229e-07, "aux_brier/mean_r": 0.5417146195967992, "aux_brier/n_active_tok": 25.6, "aux_brier/n_step_records": 6.4, "aux_brier/std_r": 0.1558731513718764, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.5071428571428571, "calib/avg_num_step_conf": 0.375, "calib/ece": 0.5766666666666667, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.7083333333333334, "calib/gap": -0.10657142857142843, "calib/mean_conf": 0.8341666666666666, "calib/mu_c": 0.772, "calib/mu_w": 0.8785714285714284, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.4970833333333333, "calib/std_conf": 0.2772170729871368, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 624.78125, "completions/mean_terminated_length": 692.3982543945312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.22455056011676788, "learning_rate": 3e-06, "loss": 0.0314, "num_tokens": 3269832.0, "reward": 0.08457626402378082, "reward_std": 0.15153342485427856, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.03361757844686508, "rewards/format_reward_step": 0.07421875, "step": 12 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.4663040925081593e-06, "aux_brier/mean_r": 0.3994359121118721, "aux_brier/n_active_tok": 20.42105263157895, "aux_brier/n_step_records": 5.105263157894737, "aux_brier/std_r": 0.16137579005015523, "calib/answer_extract_rate": 0.1328125, "calib/auroc": 0.5789473684210527, "calib/avg_num_step_conf": 0.3984375, "calib/ece": 0.5519999999999999, "calib/final_conf_rate": 0.1171875, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.7666666666666667, "calib/gap": 0.07377990430622028, "calib/mean_conf": 0.8959999999999997, "calib/mu_c": 0.9427272727272729, "calib/mu_w": 0.8689473684210526, "calib/nonempty_final_conf_rate": 0.1171875, "calib/nonempty_reasoning_rate": 0.1484375, "calib/nonempty_step_conf_rate": 0.1015625, "calib/pce": 0.5406666666666666, "calib/std_conf": 0.20325025625240095, "calib/step_conf_rate": 0.1015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 583.74609375, "completions/mean_terminated_length": 609.955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.20413072407245636, "learning_rate": 3.2500000000000002e-06, "loss": 0.0414, "num_tokens": 3523863.0, "reward": 0.09119872748851776, "reward_std": 0.17687104642391205, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.03666992112994194, "rewards/format_reward_step": 0.078125, "step": 13 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.234578524191301e-06, "aux_brier/mean_r": 0.4498485525449117, "aux_brier/n_active_tok": 25.142857142857142, "aux_brier/n_step_records": 6.285714285714286, "aux_brier/std_r": 0.2020464021535147, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.6041666666666667, "calib/avg_num_step_conf": 0.51953125, "calib/ece": 0.545208, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.08908472222222219, "calib/mean_conf": 0.9052079999999999, "calib/mu_c": 0.9622222222222222, "calib/mu_w": 0.8731375, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.171875, "calib/nonempty_step_conf_rate": 0.12890625, "calib/pce": 0.545208, "calib/std_conf": 0.1966180010477169, "calib/step_conf_rate": 0.12890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 640.15625, "completions/mean_terminated_length": 685.6903686523438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 1.9388587474822998, "learning_rate": 3.5e-06, "loss": 0.0171, "num_tokens": 3793143.0, "reward": 0.08720956742763519, "reward_std": 0.15004198253154755, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.03633826598525047, "rewards/format_reward_step": 0.0859375, "step": 14 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.899765049329094e-05, "aux_brier/mean_r": 0.38708134088665247, "aux_brier/n_active_tok": 26.25, "aux_brier/n_step_records": 6.5625, "aux_brier/std_r": 0.1446176189929247, "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.59375, "calib/avg_num_step_conf": 0.41796875, "calib/ece": 0.65, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.7619047619047619, "calib/gap": 0.09699999999999998, "calib/mean_conf": 0.8880952380952378, "calib/mu_c": 0.962, "calib/mu_w": 0.865, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.65, "calib/std_conf": 0.20710289707612556, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 619.09765625, "completions/mean_terminated_length": 674.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 4.708161354064941, "learning_rate": 3.7500000000000005e-06, "loss": 0.0097, "num_tokens": 4059512.0, "reward": 0.056370899081230164, "reward_std": 0.10648147016763687, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.022358594462275505, "rewards/format_reward_step": 0.0625, "step": 15 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.136857553463891e-06, "aux_brier/mean_r": 0.5416447846662431, "aux_brier/n_active_tok": 31.238095238095237, "aux_brier/n_step_records": 7.809523809523809, "aux_brier/std_r": 0.15224610175937414, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.25877192982456143, "calib/avg_num_step_conf": 0.65234375, "calib/ece": 0.75236, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.68, "calib/gap": -0.20266666666666655, "calib/mean_conf": 0.8923599999999999, "calib/mu_c": 0.7383333333333333, "calib/mu_w": 0.9409999999999998, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.7023600000000001, "calib/std_conf": 0.18717283563594372, "calib/step_conf_rate": 0.12109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 680.01171875, "completions/mean_terminated_length": 743.9444580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.0395950973033905, "learning_rate": 4.000000000000001e-06, "loss": 0.043, "num_tokens": 4342443.0, "reward": 0.07290761172771454, "reward_std": 0.1665337234735489, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.026005465537309647, "rewards/format_reward_step": 0.0859375, "step": 16 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -8.534246501712417e-06, "aux_brier/mean_r": 0.5582275787989298, "aux_brier/n_active_tok": 27.333333333333332, "aux_brier/n_step_records": 6.833333333333333, "aux_brier/std_r": 0.16473146995607144, "calib/answer_extract_rate": 0.17578125, "calib/auroc": 0.5308641975308642, "calib/avg_num_step_conf": 0.734375, "calib/ece": 0.6869696969696968, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.109375, "calib/frac_conf_gt_0.9": 0.5757575757575758, "calib/gap": 0.004074074074074119, "calib/mean_conf": 0.8433333333333333, "calib/mu_c": 0.8466666666666667, "calib/mu_w": 0.8425925925925926, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.2109375, "calib/nonempty_step_conf_rate": 0.1484375, "calib/pce": 0.6742424242424241, "calib/std_conf": 0.2232496451070058, "calib/step_conf_rate": 0.1484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3058.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 618.03515625, "completions/mean_terminated_length": 676.1410522460938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.7789238691329956, "learning_rate": 4.25e-06, "loss": 0.0444, "num_tokens": 4604188.0, "reward": 0.08887685090303421, "reward_std": 0.1567658632993698, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.04300742223858833, "rewards/format_reward_step": 0.109375, "step": 17 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.710843873804531e-06, "aux_brier/mean_r": 0.4821825176477432, "aux_brier/n_active_tok": 20.0, "aux_brier/n_step_records": 5.0, "aux_brier/std_r": 0.16257626454656324, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.23828125, "calib/ece": 0.6481818181818182, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.7272727272727273, "calib/gap": 0.0033333333333334103, "calib/mean_conf": 0.920909090909091, "calib/mu_c": 0.9233333333333333, "calib/mu_w": 0.9199999999999999, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.6481818181818182, "calib/std_conf": 0.07316911239221412, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 653.99609375, "completions/mean_terminated_length": 744.1022338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.17543430626392365, "learning_rate": 4.5e-06, "loss": 0.032, "num_tokens": 4882331.0, "reward": 0.03487773612141609, "reward_std": 0.08869786560535431, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.014510937966406345, "rewards/format_reward_step": 0.0390625, "step": 18 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.1366190287377873e-07, "aux_brier/mean_r": 0.4756902147616659, "aux_brier/n_active_tok": 33.714285714285715, "aux_brier/n_step_records": 8.428571428571429, "aux_brier/std_r": 0.18338692931657924, "calib/answer_extract_rate": 0.20703125, "calib/auroc": 0.29885057471264365, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.6370731707317073, "calib/final_conf_rate": 0.16015625, "calib/format_rate": 0.1328125, "calib/frac_conf_gt_0.9": 0.8536585365853658, "calib/gap": -0.0020114942528735025, "calib/mean_conf": 0.9297560975609755, "calib/mu_c": 0.9283333333333332, "calib/mu_w": 0.9303448275862067, "calib/nonempty_final_conf_rate": 0.16015625, "calib/nonempty_reasoning_rate": 0.25, "calib/nonempty_step_conf_rate": 0.19140625, "calib/pce": 0.6370731707317073, "calib/std_conf": 0.15232327312656618, "calib/step_conf_rate": 0.19140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 629.8828125, "completions/mean_terminated_length": 669.087158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.25565633177757263, "learning_rate": 4.75e-06, "loss": 0.0358, "num_tokens": 5148341.0, "reward": 0.1264823079109192, "reward_std": 0.25401872396469116, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.05280429869890213, "rewards/format_reward_step": 0.1328125, "step": 19 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -6.432423591260429e-06, "aux_brier/mean_r": 0.4974280207835395, "aux_brier/n_active_tok": 36.689655172413794, "aux_brier/n_step_records": 9.172413793103448, "aux_brier/std_r": 0.1820825741722666, "calib/answer_extract_rate": 0.2421875, "calib/auroc": 0.3118466898954704, "calib/avg_num_step_conf": 1.0703125, "calib/ece": 0.789375, "calib/final_conf_rate": 0.1875, "calib/format_rate": 0.13671875, "calib/frac_conf_gt_0.9": 0.7916666666666666, "calib/gap": -0.06888501742160269, "calib/mean_conf": 0.9031249999999998, "calib/mu_c": 0.8442857142857143, "calib/mu_w": 0.913170731707317, "calib/nonempty_final_conf_rate": 0.1875, "calib/nonempty_reasoning_rate": 0.2890625, "calib/nonempty_step_conf_rate": 0.203125, "calib/pce": 0.7733333333333333, "calib/std_conf": 0.16700993296308256, "calib/step_conf_rate": 0.203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 608.5625, "completions/mean_terminated_length": 623.1680297851562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.5931670069694519, "learning_rate": 5e-06, "loss": 0.0586, "num_tokens": 5409005.0, "reward": 0.10494725406169891, "reward_std": 0.17312300205230713, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.036976560950279236, "rewards/format_reward_step": 0.13671875, "step": 20 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3676254636393485e-06, "aux_brier/mean_r": 0.48050905391573906, "aux_brier/n_active_tok": 43.714285714285715, "aux_brier/n_step_records": 10.928571428571429, "aux_brier/std_r": 0.20407255590544082, "calib/answer_extract_rate": 0.2578125, "calib/auroc": 0.5731382978723404, "calib/avg_num_step_conf": 1.21875, "calib/ece": 0.8092727272727276, "calib/final_conf_rate": 0.21484375, "calib/format_rate": 0.1875, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": -0.10486702127659597, "calib/mean_conf": 0.9183636363636365, "calib/mu_c": 0.82875, "calib/mu_w": 0.933617021276596, "calib/nonempty_final_conf_rate": 0.21484375, "calib/nonempty_reasoning_rate": 0.2890625, "calib/nonempty_step_conf_rate": 0.23046875, "calib/pce": 0.7910909090909094, "calib/std_conf": 0.14679315857681746, "calib/step_conf_rate": 0.23046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2782.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 631.68359375, "completions/mean_terminated_length": 665.4773559570312, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.0224, "grad_norm": 1.6513150930404663, "learning_rate": 4.9722222222222224e-06, "loss": 0.1303, "num_tokens": 5673676.0, "reward": 0.13685917854309082, "reward_std": 0.23789754509925842, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.04743672162294388, "rewards/format_reward_step": 0.1875, "step": 21 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.3084986293006214e-07, "aux_brier/mean_r": 0.5050474463237656, "aux_brier/n_active_tok": 35.55555555555556, "aux_brier/n_step_records": 8.88888888888889, "aux_brier/std_r": 0.18726607936399955, "calib/answer_extract_rate": 0.234375, "calib/auroc": 0.7017374517374517, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.6479411764705882, "calib/final_conf_rate": 0.19921875, "calib/format_rate": 0.15625, "calib/frac_conf_gt_0.9": 0.7254901960784313, "calib/gap": 0.05640926640926669, "calib/mean_conf": 0.9126470588235295, "calib/mu_c": 0.9535714285714286, "calib/mu_w": 0.8971621621621619, "calib/nonempty_final_conf_rate": 0.19921875, "calib/nonempty_reasoning_rate": 0.3046875, "calib/nonempty_step_conf_rate": 0.23046875, "calib/pce": 0.6430392156862744, "calib/std_conf": 0.1539714543863871, "calib/step_conf_rate": 0.23046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2857.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 570.92578125, "completions/mean_terminated_length": 599.0040893554688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.023466666666666667, "grad_norm": 0.9173411130905151, "learning_rate": 4.944444444444445e-06, "loss": 0.0832, "num_tokens": 5921649.0, "reward": 0.14846542477607727, "reward_std": 0.2522590756416321, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.06261172145605087, "rewards/format_reward_step": 0.15625, "step": 22 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.8481565401035806e-08, "aux_brier/mean_r": 0.496414174636205, "aux_brier/n_active_tok": 48.888888888888886, "aux_brier/n_step_records": 12.222222222222221, "aux_brier/std_r": 0.22430429524845547, "calib/answer_extract_rate": 0.328125, "calib/auroc": 0.34572697003329633, "calib/avg_num_step_conf": 1.3125, "calib/ece": 0.689242857142857, "calib/final_conf_rate": 0.2734375, "calib/format_rate": 0.21484375, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": -0.09848945615982241, "calib/mean_conf": 0.8881, "calib/mu_c": 0.8135294117647058, "calib/mu_w": 0.9120188679245282, "calib/nonempty_final_conf_rate": 0.2734375, "calib/nonempty_reasoning_rate": 0.3671875, "calib/nonempty_step_conf_rate": 0.2734375, "calib/pce": 0.667242857142857, "calib/std_conf": 0.18237779861751657, "calib/step_conf_rate": 0.2734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 556.08203125, "completions/mean_terminated_length": 590.6929931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.024533333333333334, "grad_norm": 0.38058707118034363, "learning_rate": 4.9166666666666665e-06, "loss": 0.0773, "num_tokens": 6167942.0, "reward": 0.19895045459270477, "reward_std": 0.31784963607788086, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.06923934072256088, "rewards/format_reward_step": 0.21484375, "step": 23 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.6639704084468797e-08, "aux_brier/mean_r": 0.527713478572907, "aux_brier/n_active_tok": 65.80645161290323, "aux_brier/n_step_records": 16.451612903225808, "aux_brier/std_r": 0.22502769331537908, "calib/answer_extract_rate": 0.390625, "calib/auroc": 0.5677536231884058, "calib/avg_num_step_conf": 2.0546875, "calib/ece": 0.6527325842696629, "calib/final_conf_rate": 0.34765625, "calib/format_rate": 0.29296875, "calib/frac_conf_gt_0.9": 0.6966292134831461, "calib/gap": 0.016118840579710136, "calib/mean_conf": 0.858503370786517, "calib/mu_c": 0.8709999999999999, "calib/mu_w": 0.8548811594202897, "calib/nonempty_final_conf_rate": 0.34765625, "calib/nonempty_reasoning_rate": 0.44921875, "calib/nonempty_step_conf_rate": 0.38671875, "calib/pce": 0.6432584269662921, "calib/std_conf": 0.22701041130875557, "calib/step_conf_rate": 0.38671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 546.4609375, "completions/mean_terminated_length": 575.6954345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0256, "grad_norm": 0.5940788984298706, "learning_rate": 4.888888888888889e-06, "loss": 0.0825, "num_tokens": 6412348.0, "reward": 0.2518075108528137, "reward_std": 0.35597795248031616, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.1087925136089325, "rewards/format_reward_step": 0.29296875, "step": 24 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.004320237748505e-08, "aux_brier/mean_r": 0.5155417505771883, "aux_brier/n_active_tok": 59.096774193548384, "aux_brier/n_step_records": 14.774193548387096, "aux_brier/std_r": 0.24959693704881974, "calib/answer_extract_rate": 0.39453125, "calib/auroc": 0.553921568627451, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.6370760869565217, "calib/final_conf_rate": 0.359375, "calib/format_rate": 0.3046875, "calib/frac_conf_gt_0.9": 0.6739130434782609, "calib/gap": 0.031235294117646917, "calib/mean_conf": 0.8731630434782608, "calib/mu_c": 0.8962499999999999, "calib/mu_w": 0.865014705882353, "calib/nonempty_final_conf_rate": 0.359375, "calib/nonempty_reasoning_rate": 0.484375, "calib/nonempty_step_conf_rate": 0.41796875, "calib/pce": 0.6246847826086956, "calib/std_conf": 0.19095243416221844, "calib/step_conf_rate": 0.41796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 510.1796875, "completions/mean_terminated_length": 544.1917114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.02666666666666667, "grad_norm": 3.0921874046325684, "learning_rate": 4.861111111111111e-06, "loss": 0.0844, "num_tokens": 6646178.0, "reward": 0.2842850089073181, "reward_std": 0.41399136185646057, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.12151507288217545, "rewards/format_reward_step": 0.3046875, "step": 25 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.662669238400305e-08, "aux_brier/mean_r": 0.5430627530440688, "aux_brier/n_active_tok": 75.375, "aux_brier/n_step_records": 18.84375, "aux_brier/std_r": 0.23851684853434563, "calib/answer_extract_rate": 0.51171875, "calib/auroc": 0.4463917525773196, "calib/avg_num_step_conf": 2.44140625, "calib/ece": 0.7158839285714286, "calib/final_conf_rate": 0.4375, "calib/format_rate": 0.37109375, "calib/frac_conf_gt_0.9": 0.6696428571428571, "calib/gap": 0.0017560137457043323, "calib/mean_conf": 0.8498125, "calib/mu_c": 0.8513333333333332, "calib/mu_w": 0.8495773195876288, "calib/nonempty_final_conf_rate": 0.4375, "calib/nonempty_reasoning_rate": 0.5703125, "calib/nonempty_step_conf_rate": 0.46484375, "calib/pce": 0.7158839285714286, "calib/std_conf": 0.23122030818070147, "calib/step_conf_rate": 0.46484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2714.0, "completions/max_terminated_length": 2714.0, "completions/mean_length": 413.05859375, "completions/mean_terminated_length": 426.383056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.027733333333333332, "grad_norm": 1.2386794090270996, "learning_rate": 4.833333333333333e-06, "loss": 0.1663, "num_tokens": 6857161.0, "reward": 0.28813180327415466, "reward_std": 0.4005846381187439, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.12908968329429626, "rewards/format_reward_step": 0.37109375, "step": 26 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.4259333852573164e-09, "aux_brier/mean_r": 0.5291969170793891, "aux_brier/n_active_tok": 79.625, "aux_brier/n_step_records": 19.90625, "aux_brier/std_r": 0.24978127051144838, "calib/answer_extract_rate": 0.58203125, "calib/auroc": 0.46126126126126127, "calib/avg_num_step_conf": 2.57421875, "calib/ece": 0.7419755725190837, "calib/final_conf_rate": 0.51171875, "calib/format_rate": 0.421875, "calib/frac_conf_gt_0.9": 0.7022900763358778, "calib/gap": -0.015705405405405326, "calib/mean_conf": 0.8838076335877861, "calib/mu_c": 0.8705, "calib/mu_w": 0.8862054054054054, "calib/nonempty_final_conf_rate": 0.51171875, "calib/nonempty_reasoning_rate": 0.640625, "calib/nonempty_step_conf_rate": 0.50390625, "calib/pce": 0.7365557251908396, "calib/std_conf": 0.17750962112117555, "calib/step_conf_rate": 0.50390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 389.20703125, "completions/mean_terminated_length": 405.0284423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0288, "grad_norm": 1.5228068828582764, "learning_rate": 4.805555555555556e-06, "loss": 0.0851, "num_tokens": 7062014.0, "reward": 0.3301459550857544, "reward_std": 0.43127691745758057, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.1330837607383728, "rewards/format_reward_step": 0.421875, "step": 27 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.2769116139454972e-08, "aux_brier/mean_r": 0.49373787781223655, "aux_brier/n_active_tok": 70.25, "aux_brier/n_step_records": 17.5625, "aux_brier/std_r": 0.24095073924399912, "calib/answer_extract_rate": 0.52734375, "calib/auroc": 0.4760526315789474, "calib/avg_num_step_conf": 2.23828125, "calib/ece": 0.7305630252100841, "calib/final_conf_rate": 0.46484375, "calib/format_rate": 0.38671875, "calib/frac_conf_gt_0.9": 0.6722689075630253, "calib/gap": 0.027914210526315597, "calib/mean_conf": 0.8902268907563026, "calib/mu_c": 0.9136842105263155, "calib/mu_w": 0.88577, "calib/nonempty_final_conf_rate": 0.46484375, "calib/nonempty_reasoning_rate": 0.60546875, "calib/nonempty_step_conf_rate": 0.5, "calib/pce": 0.7305630252100841, "calib/std_conf": 0.15394047943819175, "calib/step_conf_rate": 0.5, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2941.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 460.7109375, "completions/mean_terminated_length": 479.4389953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.029866666666666666, "grad_norm": 1.854733943939209, "learning_rate": 4.777777777777778e-06, "loss": 0.1565, "num_tokens": 7286900.0, "reward": 0.3019137978553772, "reward_std": 0.392570436000824, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.12171761691570282, "rewards/format_reward_step": 0.38671875, "step": 28 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.6980641901208635e-08, "aux_brier/mean_r": 0.5290570193901658, "aux_brier/n_active_tok": 96.5, "aux_brier/n_step_records": 24.125, "aux_brier/std_r": 0.24694413202814758, "calib/answer_extract_rate": 0.6015625, "calib/auroc": 0.5291994750656168, "calib/avg_num_step_conf": 3.46484375, "calib/ece": 0.7005708609271524, "calib/final_conf_rate": 0.58984375, "calib/format_rate": 0.4609375, "calib/frac_conf_gt_0.9": 0.6291390728476821, "calib/gap": 0.07287769028871394, "calib/mean_conf": 0.8503721854304637, "calib/mu_c": 0.9116666666666667, "calib/mu_w": 0.8387889763779528, "calib/nonempty_final_conf_rate": 0.58984375, "calib/nonempty_reasoning_rate": 0.72265625, "calib/nonempty_step_conf_rate": 0.62890625, "calib/pce": 0.6960013245033113, "calib/std_conf": 0.22142825252083165, "calib/step_conf_rate": 0.62890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 438.91015625, "completions/mean_terminated_length": 453.06854248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.030933333333333334, "grad_norm": 1.6967421770095825, "learning_rate": 4.75e-06, "loss": 0.1247, "num_tokens": 7506389.0, "reward": 0.3713197112083435, "reward_std": 0.4068503975868225, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.1727789044380188, "rewards/format_reward_step": 0.4609375, "step": 29 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.1612126801785472e-08, "aux_brier/mean_r": 0.5004166094586253, "aux_brier/n_active_tok": 98.625, "aux_brier/n_step_records": 24.65625, "aux_brier/std_r": 0.24637897731736302, "calib/answer_extract_rate": 0.66015625, "calib/auroc": 0.6017948717948718, "calib/avg_num_step_conf": 3.140625, "calib/ece": 0.6691875, "calib/final_conf_rate": 0.625, "calib/format_rate": 0.53125, "calib/frac_conf_gt_0.9": 0.63125, "calib/gap": 0.07464102564102604, "calib/mean_conf": 0.8566874999999999, "calib/mu_c": 0.9173333333333336, "calib/mu_w": 0.8426923076923075, "calib/nonempty_final_conf_rate": 0.625, "calib/nonempty_reasoning_rate": 0.734375, "calib/nonempty_step_conf_rate": 0.6484375, "calib/pce": 0.6691875, "calib/std_conf": 0.21933057776732817, "calib/step_conf_rate": 0.6484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 422.63671875, "completions/mean_terminated_length": 445.24688720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 1.4614795446395874, "learning_rate": 4.722222222222222e-06, "loss": 0.0641, "num_tokens": 7721568.0, "reward": 0.43827489018440247, "reward_std": 0.4441397190093994, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.20622462034225464, "rewards/format_reward_step": 0.53125, "step": 30 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.430193184523645e-09, "aux_brier/mean_r": 0.5572385936975479, "aux_brier/n_active_tok": 113.875, "aux_brier/n_step_records": 28.46875, "aux_brier/std_r": 0.258733581751585, "calib/answer_extract_rate": 0.71875, "calib/auroc": 0.5455629139072847, "calib/avg_num_step_conf": 3.68359375, "calib/ece": 0.661715340909091, "calib/final_conf_rate": 0.6875, "calib/format_rate": 0.59765625, "calib/frac_conf_gt_0.9": 0.5284090909090909, "calib/gap": 0.08280066225165572, "calib/mean_conf": 0.8037607954545455, "calib/mu_c": 0.8748, "calib/mu_w": 0.7919993377483443, "calib/nonempty_final_conf_rate": 0.6875, "calib/nonempty_reasoning_rate": 0.8046875, "calib/nonempty_step_conf_rate": 0.6953125, "calib/pce": 0.661715340909091, "calib/std_conf": 0.2652597245613575, "calib/step_conf_rate": 0.6953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 367.8828125, "completions/mean_terminated_length": 384.3999938964844, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.03306666666666667, "grad_norm": 1.8279743194580078, "learning_rate": 4.694444444444445e-06, "loss": 0.0837, "num_tokens": 7921658.0, "reward": 0.45326340198516846, "reward_std": 0.376362681388855, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.2271161526441574, "rewards/format_reward_step": 0.59765625, "step": 31 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.597233753065531e-08, "aux_brier/mean_r": 0.5949481436982751, "aux_brier/n_active_tok": 108.625, "aux_brier/n_step_records": 27.15625, "aux_brier/std_r": 0.25198414269834757, "calib/answer_extract_rate": 0.75390625, "calib/auroc": 0.49517374517374513, "calib/avg_num_step_conf": 3.4375, "calib/ece": 0.6471584699453552, "calib/final_conf_rate": 0.71484375, "calib/format_rate": 0.6328125, "calib/frac_conf_gt_0.9": 0.546448087431694, "calib/gap": 0.011173745173745142, "calib/mean_conf": 0.822677595628415, "calib/mu_c": 0.8317142857142856, "calib/mu_w": 0.8205405405405405, "calib/nonempty_final_conf_rate": 0.71484375, "calib/nonempty_reasoning_rate": 0.82421875, "calib/nonempty_step_conf_rate": 0.75, "calib/pce": 0.6392896174863388, "calib/std_conf": 0.23999530935924054, "calib/step_conf_rate": 0.75, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 367.51953125, "completions/mean_terminated_length": 371.8774719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.034133333333333335, "grad_norm": 0.9370484948158264, "learning_rate": 4.666666666666667e-06, "loss": 0.0933, "num_tokens": 8122447.0, "reward": 0.5239681005477905, "reward_std": 0.44192588329315186, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.25212225317955017, "rewards/format_reward_step": 0.6328125, "step": 32 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.078682537209886e-08, "aux_brier/mean_r": 0.5465734777972102, "aux_brier/n_active_tok": 120.75, "aux_brier/n_step_records": 30.1875, "aux_brier/std_r": 0.250720904674381, "calib/answer_extract_rate": 0.7890625, "calib/auroc": 0.5107784431137725, "calib/avg_num_step_conf": 3.87890625, "calib/ece": 0.6996868020304567, "calib/final_conf_rate": 0.76953125, "calib/format_rate": 0.6640625, "calib/frac_conf_gt_0.9": 0.5939086294416244, "calib/gap": 0.02776015968063883, "calib/mean_conf": 0.8421406091370559, "calib/mu_c": 0.8656733333333334, "calib/mu_w": 0.8379131736526946, "calib/nonempty_final_conf_rate": 0.76953125, "calib/nonempty_reasoning_rate": 0.86328125, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.6947715736040608, "calib/std_conf": 0.22256769454587635, "calib/step_conf_rate": 0.7578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 349.01171875, "completions/mean_terminated_length": 351.75982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0352, "grad_norm": 1.4318177700042725, "learning_rate": 4.638888888888889e-06, "loss": 0.0916, "num_tokens": 8318666.0, "reward": 0.5124556422233582, "reward_std": 0.43789881467819214, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.2373226284980774, "rewards/format_reward_step": 0.6640625, "step": 33 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.8216000555182355e-08, "aux_brier/mean_r": 0.5587637647986412, "aux_brier/n_active_tok": 129.0, "aux_brier/n_step_records": 32.25, "aux_brier/std_r": 0.2557013728655875, "calib/answer_extract_rate": 0.87109375, "calib/auroc": 0.5434593023255814, "calib/avg_num_step_conf": 4.1484375, "calib/ece": 0.640282075471698, "calib/final_conf_rate": 0.828125, "calib/format_rate": 0.73828125, "calib/frac_conf_gt_0.9": 0.5518867924528302, "calib/gap": 0.039743023255813914, "calib/mean_conf": 0.8207556603773585, "calib/mu_c": 0.853, "calib/mu_w": 0.8132569767441861, "calib/nonempty_final_conf_rate": 0.828125, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.83984375, "calib/pce": 0.6361792452830188, "calib/std_conf": 0.23506477412406535, "calib/step_conf_rate": 0.83984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2392.0, "completions/max_terminated_length": 2392.0, "completions/mean_length": 289.01953125, "completions/mean_terminated_length": 290.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.03626666666666667, "grad_norm": 0.595558762550354, "learning_rate": 4.611111111111112e-06, "loss": 0.0463, "num_tokens": 8497767.0, "reward": 0.6062667369842529, "reward_std": 0.4581567347049713, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.3078796863555908, "rewards/format_reward_step": 0.73828125, "step": 34 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 9.28674487310488e-09, "aux_brier/mean_r": 0.5592245208099484, "aux_brier/n_active_tok": 145.25, "aux_brier/n_step_records": 36.3125, "aux_brier/std_r": 0.24941385397687554, "calib/answer_extract_rate": 0.8359375, "calib/auroc": 0.47038060159607126, "calib/avg_num_step_conf": 4.59765625, "calib/ece": 0.6524806451612903, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.4976958525345622, "calib/gap": -0.013567955801105036, "calib/mean_conf": 0.8038170506912443, "calib/mu_c": 0.7925, "calib/mu_w": 0.806067955801105, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.91796875, "calib/nonempty_step_conf_rate": 0.8359375, "calib/pce": 0.645199539170507, "calib/std_conf": 0.24386630796583364, "calib/step_conf_rate": 0.8359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2324.0, "completions/max_terminated_length": 2324.0, "completions/mean_length": 322.125, "completions/mean_terminated_length": 328.5418395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.037333333333333336, "grad_norm": 0.5558879375457764, "learning_rate": 4.583333333333333e-06, "loss": 0.0753, "num_tokens": 8689487.0, "reward": 0.5828825235366821, "reward_std": 0.4019334316253662, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.2846549451351166, "rewards/format_reward_step": 0.734375, "step": 35 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.5828130806028184e-08, "aux_brier/mean_r": 0.6171585414558649, "aux_brier/n_active_tok": 144.375, "aux_brier/n_step_records": 36.09375, "aux_brier/std_r": 0.2658169395290315, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.4841391268869849, "calib/avg_num_step_conf": 4.578125, "calib/ece": 0.5913537117903931, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.84375, "calib/frac_conf_gt_0.9": 0.5414847161572053, "calib/gap": -0.010875152998775728, "calib/mean_conf": 0.8176419213973799, "calib/mu_c": 0.8094736842105263, "calib/mu_w": 0.8203488372093021, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.5800436681222708, "calib/std_conf": 0.23306507738182952, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2466.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 293.390625, "completions/mean_terminated_length": 294.54119873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.0384, "grad_norm": 5.582181930541992, "learning_rate": 4.555555555555556e-06, "loss": 0.0643, "num_tokens": 8867307.0, "reward": 0.7395055294036865, "reward_std": 0.48076409101486206, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.36427226662635803, "rewards/format_reward_step": 0.84375, "step": 36 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.1792812358579763e-08, "aux_brier/mean_r": 0.5962470676749945, "aux_brier/n_active_tok": 137.5, "aux_brier/n_step_records": 34.375, "aux_brier/std_r": 0.2520415005274117, "calib/answer_extract_rate": 0.8984375, "calib/auroc": 0.45482804232804236, "calib/avg_num_step_conf": 4.33984375, "calib/ece": 0.6319737969432314, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.4497816593886463, "calib/gap": -0.02289020899470895, "calib/mean_conf": 0.7956419192139738, "calib/mu_c": 0.77675, "calib/mu_w": 0.799640208994709, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.8984375, "calib/pce": 0.6264716135371179, "calib/std_conf": 0.23069053031651537, "calib/step_conf_rate": 0.8984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 291.265625, "completions/mean_terminated_length": 291.265625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.039466666666666664, "grad_norm": 2.787907838821411, "learning_rate": 4.527777777777778e-06, "loss": 0.077, "num_tokens": 9048967.0, "reward": 0.6483741998672485, "reward_std": 0.3858487010002136, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.3356843590736389, "rewards/format_reward_step": 0.80859375, "step": 37 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.652600901431601e-08, "aux_brier/mean_r": 0.6513330563902855, "aux_brier/n_active_tok": 147.875, "aux_brier/n_step_records": 36.96875, "aux_brier/std_r": 0.24226353410631418, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.3917378917378918, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.6340745614035087, "calib/final_conf_rate": 0.890625, "calib/format_rate": 0.8515625, "calib/frac_conf_gt_0.9": 0.40789473684210525, "calib/gap": -0.06964468864468865, "calib/mean_conf": 0.7800394736842104, "calib/mu_c": 0.7223076923076922, "calib/mu_w": 0.7919523809523809, "calib/nonempty_final_conf_rate": 0.890625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.6215307017543859, "calib/std_conf": 0.2469780571681015, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2041.0, "completions/max_terminated_length": 2041.0, "completions/mean_length": 294.5546875, "completions/mean_terminated_length": 294.5546875, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.04053333333333333, "grad_norm": 1.2635794878005981, "learning_rate": 4.5e-06, "loss": 0.0458, "num_tokens": 9231261.0, "reward": 0.6731448173522949, "reward_std": 0.3658376932144165, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.3488292098045349, "rewards/format_reward_step": 0.8515625, "step": 38 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.716481329964829e-08, "aux_brier/mean_r": 0.6589524978771806, "aux_brier/n_active_tok": 146.875, "aux_brier/n_step_records": 36.71875, "aux_brier/std_r": 0.24464661162346601, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.5567956349206349, "calib/avg_num_step_conf": 4.625, "calib/ece": 0.5829487179487179, "calib/final_conf_rate": 0.9140625, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.3547008547008547, "calib/gap": 0.04424107142857148, "calib/mean_conf": 0.7534615384615385, "calib/mu_c": 0.7897619047619048, "calib/mu_w": 0.7455208333333333, "calib/nonempty_final_conf_rate": 0.9140625, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.5784615384615385, "calib/std_conf": 0.2643755435131804, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 311.62890625, "completions/mean_terminated_length": 312.85101318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.0416, "grad_norm": 1.0386427640914917, "learning_rate": 4.472222222222223e-06, "loss": 0.0012, "num_tokens": 9417126.0, "reward": 0.6948623657226562, "reward_std": 0.40542668104171753, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.40444961190223694, "rewards/format_reward_step": 0.859375, "step": 39 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.6566617586376573e-08, "aux_brier/mean_r": 0.6813591234385967, "aux_brier/n_active_tok": 162.625, "aux_brier/n_step_records": 40.65625, "aux_brier/std_r": 0.24427634431049228, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.4678120331386663, "calib/avg_num_step_conf": 5.15234375, "calib/ece": 0.5964406779661018, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.3389830508474576, "calib/gap": -0.030463126443025956, "calib/mean_conf": 0.7383898305084745, "calib/mu_c": 0.7127027027027026, "calib/mu_w": 0.7431658291457286, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.5890254237288137, "calib/std_conf": 0.2582305182865641, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1550.0, "completions/max_terminated_length": 1550.0, "completions/mean_length": 292.95703125, "completions/mean_terminated_length": 294.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.042666666666666665, "grad_norm": 0.751466691493988, "learning_rate": 4.444444444444444e-06, "loss": 0.0118, "num_tokens": 9598883.0, "reward": 0.6895018219947815, "reward_std": 0.372747540473938, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.3986324369907379, "rewards/format_reward_step": 0.875, "step": 40 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.134182723074641e-08, "aux_brier/mean_r": 0.6953337397426367, "aux_brier/n_active_tok": 138.25, "aux_brier/n_step_records": 34.5625, "aux_brier/std_r": 0.23881069850176573, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.5351329243353784, "calib/avg_num_step_conf": 4.359375, "calib/ece": 0.44642857142857145, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.3487394957983193, "calib/gap": 0.032597955010225, "calib/mean_conf": 0.7179411764705883, "calib/mu_c": 0.7402666666666667, "calib/mu_w": 0.7076687116564417, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.4246218487394958, "calib/std_conf": 0.28714394229831125, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 280.58984375, "completions/mean_terminated_length": 280.58984375, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.04373333333333333, "grad_norm": 0.9015055894851685, "learning_rate": 4.416666666666667e-06, "loss": 0.0585, "num_tokens": 9777962.0, "reward": 0.8516530990600586, "reward_std": 0.4442591071128845, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.4847375154495239, "rewards/format_reward_step": 0.875, "step": 41 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.573970536185911e-08, "aux_brier/mean_r": 0.7264319900423288, "aux_brier/n_active_tok": 151.125, "aux_brier/n_step_records": 37.78125, "aux_brier/std_r": 0.22207428002730012, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5133831310301898, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.4974041666666667, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.20833333333333334, "calib/gap": 0.006626517273575994, "calib/mean_conf": 0.6661541666666666, "calib/mu_c": 0.6713725490196077, "calib/mu_w": 0.6647460317460318, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.4755291666666667, "calib/std_conf": 0.2960001358096066, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2182.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 264.22265625, "completions/mean_terminated_length": 265.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.0448, "grad_norm": 15.537371635437012, "learning_rate": 4.388888888888889e-06, "loss": 0.061, "num_tokens": 9949971.0, "reward": 0.7816723585128784, "reward_std": 0.400884211063385, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.48606449365615845, "rewards/format_reward_step": 0.90625, "step": 42 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.4806744103322842e-08, "aux_brier/mean_r": 0.7718657031655312, "aux_brier/n_active_tok": 158.875, "aux_brier/n_step_records": 39.71875, "aux_brier/std_r": 0.21990600717253983, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.505703005044966, "calib/avg_num_step_conf": 5.03515625, "calib/ece": 0.4753062240663901, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.24481327800829875, "calib/gap": 0.01189225707391961, "calib/mean_conf": 0.6368099585062241, "calib/mu_c": 0.6463829787234042, "calib/mu_w": 0.6344907216494846, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.45854771784232373, "calib/std_conf": 0.3042327737896036, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2069.0, "completions/max_terminated_length": 2069.0, "completions/mean_length": 293.890625, "completions/mean_terminated_length": 293.890625, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.04586666666666667, "grad_norm": 0.2196078896522522, "learning_rate": 4.361111111111112e-06, "loss": 0.0963, "num_tokens": 10130431.0, "reward": 0.766109824180603, "reward_std": 0.37400954961776733, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.5019391775131226, "rewards/format_reward_step": 0.8984375, "step": 43 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 5.8479750347784076e-08, "aux_brier/mean_r": 0.7793529871851206, "aux_brier/n_active_tok": 165.625, "aux_brier/n_step_records": 41.40625, "aux_brier/std_r": 0.22383457352407277, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4965351186281418, "calib/avg_num_step_conf": 5.23046875, "calib/ece": 0.49806721991701247, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.24066390041493776, "calib/gap": -0.02298221752407803, "calib/mean_conf": 0.6374863070539418, "calib/mu_c": 0.6186046511627906, "calib/mu_w": 0.6415868686868686, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.47856514522821575, "calib/std_conf": 0.2947707601099924, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1941.0, "completions/max_terminated_length": 1941.0, "completions/mean_length": 299.54296875, "completions/mean_terminated_length": 299.54296875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.046933333333333334, "grad_norm": 3.5230212211608887, "learning_rate": 4.333333333333334e-06, "loss": 0.0441, "num_tokens": 10313434.0, "reward": 0.7482264041900635, "reward_std": 0.35527920722961426, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.5007181763648987, "rewards/format_reward_step": 0.91015625, "step": 44 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.75541330791285e-08, "aux_brier/mean_r": 0.787274956703186, "aux_brier/n_active_tok": 164.25, "aux_brier/n_step_records": 41.0625, "aux_brier/std_r": 0.2170534166507423, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5048076923076923, "calib/avg_num_step_conf": 5.171875, "calib/ece": 0.42102748971193404, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.1646090534979424, "calib/gap": 0.010267589743589767, "calib/mean_conf": 0.5825939094650207, "calib/mu_c": 0.5908333333333333, "calib/mu_w": 0.5805657435897436, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4030452674897119, "calib/std_conf": 0.2938708620767295, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2234.0, "completions/max_terminated_length": 2234.0, "completions/mean_length": 287.80859375, "completions/mean_terminated_length": 288.9372863769531, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.048, "grad_norm": 22.389236450195312, "learning_rate": 4.305555555555556e-06, "loss": 0.0601, "num_tokens": 10492161.0, "reward": 0.7930862903594971, "reward_std": 0.3659040331840515, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.5629702806472778, "rewards/format_reward_step": 0.9296875, "step": 45 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.4132480763716657e-08, "aux_brier/mean_r": 0.8361055795103312, "aux_brier/n_active_tok": 174.75, "aux_brier/n_step_records": 43.6875, "aux_brier/std_r": 0.17583590000867844, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.49730773138270856, "calib/avg_num_step_conf": 5.6015625, "calib/ece": 0.39668032786885243, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.12295081967213115, "calib/gap": 0.0005130549629177583, "calib/mean_conf": 0.5633196721311475, "calib/mu_c": 0.5637254901960783, "calib/mu_w": 0.5632124352331606, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3754918032786886, "calib/std_conf": 0.29898058400673316, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 876.0, "completions/max_terminated_length": 876.0, "completions/mean_length": 277.203125, "completions/mean_terminated_length": 278.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.04906666666666667, "grad_norm": 3.052894115447998, "learning_rate": 4.277777777777778e-06, "loss": 0.033, "num_tokens": 10667893.0, "reward": 0.8054818511009216, "reward_std": 0.37371695041656494, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.5734898447990417, "rewards/format_reward_step": 0.91796875, "step": 46 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.4190948175184523e-08, "aux_brier/mean_r": 0.8653046786785126, "aux_brier/n_active_tok": 160.75, "aux_brier/n_step_records": 40.1875, "aux_brier/std_r": 0.1736625115154311, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.48142076502732245, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.3261825726141079, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.13278008298755187, "calib/gap": -0.016099271402550186, "calib/mean_conf": 0.5146473029045643, "calib/mu_c": 0.502622950819672, "calib/mu_w": 0.5187222222222222, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.29385892116182566, "calib/std_conf": 0.31811683323609485, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 273.125, "completions/mean_terminated_length": 274.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.050133333333333335, "grad_norm": 6.314430236816406, "learning_rate": 4.25e-06, "loss": 0.0111, "num_tokens": 10843789.0, "reward": 0.8415087461471558, "reward_std": 0.3953862190246582, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.584784746170044, "rewards/format_reward_step": 0.9140625, "step": 47 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.8517816302864318e-08, "aux_brier/mean_r": 0.8656607959419489, "aux_brier/n_active_tok": 146.5, "aux_brier/n_step_records": 36.625, "aux_brier/std_r": 0.16868824022822082, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5861617620509117, "calib/avg_num_step_conf": 4.6484375, "calib/ece": 0.30096385542168674, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.0963855421686747, "calib/gap": 0.09299602816392855, "calib/mean_conf": 0.480562248995984, "calib/mu_c": 0.551896551724138, "calib/mu_w": 0.45890052356020944, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.27429718875502007, "calib/std_conf": 0.31365907159420015, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 257.91015625, "completions/mean_terminated_length": 258.92156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.0512, "grad_norm": 3.1995794773101807, "learning_rate": 4.222222222222223e-06, "loss": 0.0603, "num_tokens": 11013502.0, "reward": 0.857565701007843, "reward_std": 0.35615259408950806, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.6490129232406616, "rewards/format_reward_step": 0.9296875, "step": 48 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -7.843927246620197e-08, "aux_brier/mean_r": 0.8787881899625063, "aux_brier/n_active_tok": 167.0, "aux_brier/n_step_records": 41.75, "aux_brier/std_r": 0.1510406262241304, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5664884291725105, "calib/avg_num_step_conf": 5.25, "calib/ece": 0.28739837398373985, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.08536585365853659, "calib/gap": 0.07013148667601687, "calib/mean_conf": 0.45544715447154477, "calib/mu_c": 0.5079032258064516, "calib/mu_w": 0.43777173913043477, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2454065040650406, "calib/std_conf": 0.31157024820267054, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2091.0, "completions/max_terminated_length": 2091.0, "completions/mean_length": 296.5, "completions/mean_terminated_length": 297.6627502441406, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.05226666666666667, "grad_norm": 0.8690859079360962, "learning_rate": 4.194444444444445e-06, "loss": 0.0714, "num_tokens": 11193942.0, "reward": 0.877479076385498, "reward_std": 0.35691165924072266, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.658353865146637, "rewards/format_reward_step": 0.94140625, "step": 49 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.867901631453673e-07, "aux_brier/mean_r": 0.8880037181079388, "aux_brier/n_active_tok": 175.375, "aux_brier/n_step_records": 43.84375, "aux_brier/std_r": 0.1522318616625853, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5776731040979746, "calib/avg_num_step_conf": 5.5234375, "calib/ece": 0.30076612903225797, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0967741935483871, "calib/gap": 0.07630051813471506, "calib/mean_conf": 0.47262096774193546, "calib/mu_c": 0.532, "calib/mu_w": 0.45569948186528497, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2758064516129032, "calib/std_conf": 0.29482769962060823, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1813.0, "completions/max_terminated_length": 1813.0, "completions/mean_length": 288.8828125, "completions/mean_terminated_length": 290.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.05333333333333334, "grad_norm": 0.5157153606414795, "learning_rate": 4.166666666666667e-06, "loss": 0.0268, "num_tokens": 11373256.0, "reward": 0.8458638191223145, "reward_std": 0.36319294571876526, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.6568929553031921, "rewards/format_reward_step": 0.93359375, "step": 50 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.807621283795747e-07, "aux_brier/mean_r": 0.9186164885759354, "aux_brier/n_active_tok": 169.75, "aux_brier/n_step_records": 42.4375, "aux_brier/std_r": 0.1274417027598247, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48785842293906806, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.2792276422764227, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.07317073170731707, "calib/gap": -0.014639784946236523, "calib/mean_conf": 0.405569105691057, "calib/mu_c": 0.3945, "calib/mu_w": 0.40913978494623654, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.22044715447154473, "calib/std_conf": 0.30039757074409956, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 269.765625, "completions/mean_terminated_length": 270.82354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.0544, "grad_norm": 0.6914809346199036, "learning_rate": 4.138888888888889e-06, "loss": -0.009, "num_tokens": 11551612.0, "reward": 0.8700729012489319, "reward_std": 0.3210793733596802, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.6521666049957275, "rewards/format_reward_step": 0.9375, "step": 51 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.3941258883432006e-07, "aux_brier/mean_r": 0.9089175015687943, "aux_brier/n_active_tok": 150.5, "aux_brier/n_step_records": 37.625, "aux_brier/std_r": 0.1273875687038526, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5205358513741748, "calib/avg_num_step_conf": 4.77734375, "calib/ece": 0.23066938775510204, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.053061224489795916, "calib/gap": 0.012674804237678416, "calib/mean_conf": 0.38451428571428564, "calib/mu_c": 0.39315384615384613, "calib/mu_w": 0.3804790419161677, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14840816326530615, "calib/std_conf": 0.29177068994366806, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 273.01953125, "completions/mean_terminated_length": 273.01953125, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.055466666666666664, "grad_norm": 0.8431240320205688, "learning_rate": 4.111111111111111e-06, "loss": -0.0328, "num_tokens": 11729457.0, "reward": 0.9458637237548828, "reward_std": 0.4054407477378845, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.658454954624176, "rewards/format_reward_step": 0.9453125, "step": 52 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.481655851065522e-07, "aux_brier/mean_r": 0.9112457670271397, "aux_brier/n_active_tok": 169.25, "aux_brier/n_step_records": 42.3125, "aux_brier/std_r": 0.1347796720219776, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5619281045751634, "calib/avg_num_step_conf": 5.3359375, "calib/ece": 0.20561491935483875, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.07661290322580645, "calib/gap": 0.055010620915032726, "calib/mean_conf": 0.37051411290322583, "calib/mu_c": 0.41044117647058825, "calib/mu_w": 0.3554305555555555, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15096774193548387, "calib/std_conf": 0.3098516209713415, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2074.0, "completions/max_terminated_length": 2074.0, "completions/mean_length": 287.16015625, "completions/mean_terminated_length": 287.16015625, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.05653333333333333, "grad_norm": 0.01299208402633667, "learning_rate": 4.083333333333334e-06, "loss": 0.0425, "num_tokens": 11908794.0, "reward": 0.9189203977584839, "reward_std": 0.3469610810279846, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.6834941506385803, "rewards/format_reward_step": 0.95703125, "step": 53 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.223923470905497e-07, "aux_brier/mean_r": 0.9280948247760534, "aux_brier/n_active_tok": 167.75, "aux_brier/n_step_records": 41.9375, "aux_brier/std_r": 0.10896814439911395, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5003038077234675, "calib/avg_num_step_conf": 5.29296875, "calib/ece": 0.2552569169960474, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.06719367588932806, "calib/gap": -0.003618012422360206, "calib/mean_conf": 0.3350197628458498, "calib/mu_c": 0.33271739130434785, "calib/mu_w": 0.33633540372670806, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.11332015810276677, "calib/std_conf": 0.29438167866217413, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 841.0, "completions/max_terminated_length": 841.0, "completions/mean_length": 266.45703125, "completions/mean_terminated_length": 267.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.0576, "grad_norm": 2.0802652835845947, "learning_rate": 4.055555555555556e-06, "loss": -0.039, "num_tokens": 12083239.0, "reward": 1.0002775192260742, "reward_std": 0.3455648422241211, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6495472192764282, "rewards/format_reward_step": 0.95703125, "step": 54 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.829771703216082e-08, "aux_brier/mean_r": 0.9359429646283388, "aux_brier/n_active_tok": 171.375, "aux_brier/n_step_records": 42.84375, "aux_brier/std_r": 0.1050397614017129, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5214094558429974, "calib/avg_num_step_conf": 5.4140625, "calib/ece": 0.21253012048192776, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.040160642570281124, "calib/gap": 0.008947368421052704, "calib/mean_conf": 0.31317269076305215, "calib/mu_c": 0.32000000000000006, "calib/mu_w": 0.31105263157894736, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14437751004016067, "calib/std_conf": 0.2884900002046189, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2302.0, "completions/max_terminated_length": 2302.0, "completions/mean_length": 280.39453125, "completions/mean_terminated_length": 280.39453125, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.058666666666666666, "grad_norm": 2.2866714000701904, "learning_rate": 4.027777777777779e-06, "loss": 0.0621, "num_tokens": 12262844.0, "reward": 0.8989040851593018, "reward_std": 0.29433003067970276, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7049914598464966, "rewards/format_reward_step": 0.9609375, "step": 55 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.3253928421729242e-07, "aux_brier/mean_r": 0.9470057226717472, "aux_brier/n_active_tok": 169.625, "aux_brier/n_step_records": 42.40625, "aux_brier/std_r": 0.10112929189926945, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5867724867724867, "calib/avg_num_step_conf": 5.390625, "calib/ece": 0.1947569721115538, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": 0.07703306878306881, "calib/mean_conf": 0.2745657370517928, "calib/mu_c": 0.3408571428571429, "calib/mu_w": 0.2638240740740741, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1649402390438247, "calib/std_conf": 0.2853893143381528, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 807.0, "completions/max_terminated_length": 807.0, "completions/mean_length": 271.953125, "completions/mean_terminated_length": 273.0196228027344, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.05973333333333333, "grad_norm": 0.3527677357196808, "learning_rate": 4.000000000000001e-06, "loss": -0.0427, "num_tokens": 12439304.0, "reward": 0.8028984665870667, "reward_std": 0.27898672223091125, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.7584688067436218, "rewards/format_reward_step": 0.953125, "step": 56 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.6138612163606467e-08, "aux_brier/mean_r": 0.9579919520765543, "aux_brier/n_active_tok": 178.0, "aux_brier/n_step_records": 44.5, "aux_brier/std_r": 0.08026634107227437, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5528138528138529, "calib/avg_num_step_conf": 5.75, "calib/ece": 0.21583935742971888, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.05220883534136546, "calib/gap": 0.049861904761904774, "calib/mean_conf": 0.2892208835341365, "calib/mu_c": 0.32226190476190475, "calib/mu_w": 0.2724, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.083855421686747, "calib/std_conf": 0.2907348731602532, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2302.0, "completions/max_terminated_length": 2302.0, "completions/mean_length": 311.42578125, "completions/mean_terminated_length": 312.6470642089844, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0608, "grad_norm": 0.45644813776016235, "learning_rate": 3.972222222222223e-06, "loss": 0.0414, "num_tokens": 12625821.0, "reward": 0.9746400713920593, "reward_std": 0.38103657960891724, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6798101663589478, "rewards/format_reward_step": 0.953125, "step": 57 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.4954024780441415e-07, "aux_brier/mean_r": 0.9655646402388811, "aux_brier/n_active_tok": 197.875, "aux_brier/n_step_records": 49.46875, "aux_brier/std_r": 0.06825497176032513, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5050349889059567, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.2203585657370518, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.012206007851169154, "calib/mean_conf": 0.21645418326693228, "calib/mu_c": 0.2256451612903226, "calib/mu_w": 0.21343915343915346, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0949003984063745, "calib/std_conf": 0.2682119057515582, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2204.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 329.15234375, "completions/mean_terminated_length": 330.4431457519531, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.06186666666666667, "grad_norm": 0.5769728422164917, "learning_rate": 3.944444444444445e-06, "loss": -0.0069, "num_tokens": 12816404.0, "reward": 0.9102965593338013, "reward_std": 0.25608858466148376, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7271238565444946, "rewards/format_reward_step": 0.97265625, "step": 58 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -6.6069232862453475e-09, "aux_brier/mean_r": 0.9606976192444563, "aux_brier/n_active_tok": 196.375, "aux_brier/n_step_records": 49.09375, "aux_brier/std_r": 0.07091562316054478, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5233834586466165, "calib/avg_num_step_conf": 6.16796875, "calib/ece": 0.2352589641434263, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.01575112781954885, "calib/mean_conf": 0.16625498007968129, "calib/mu_c": 0.17723684210526314, "calib/mu_w": 0.1614857142857143, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.049362549800796816, "calib/std_conf": 0.22161917446632423, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2365.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 334.375, "completions/mean_terminated_length": 334.375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.06293333333333333, "grad_norm": 1.2409600019454956, "learning_rate": 3.916666666666667e-06, "loss": 0.0572, "num_tokens": 13008252.0, "reward": 0.9596570134162903, "reward_std": 0.29551172256469727, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7058156132698059, "rewards/format_reward_step": 0.97265625, "step": 59 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.016046108168414e-07, "aux_brier/mean_r": 0.9708010889589787, "aux_brier/n_active_tok": 201.875, "aux_brier/n_step_records": 50.46875, "aux_brier/std_r": 0.06634965338162147, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.558695652173913, "calib/avg_num_step_conf": 6.53125, "calib/ece": 0.2287745901639344, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.020491803278688523, "calib/gap": 0.003124999999999989, "calib/mean_conf": 0.1261434426229508, "calib/mu_c": 0.1285, "calib/mu_w": 0.12537500000000001, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.05450819672131147, "calib/std_conf": 0.20670117794825824, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2110.0, "completions/max_terminated_length": 2110.0, "completions/mean_length": 335.45703125, "completions/mean_terminated_length": 336.7725830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.064, "grad_norm": 0.9692597389221191, "learning_rate": 3.88888888888889e-06, "loss": 0.0557, "num_tokens": 13202985.0, "reward": 0.8876397013664246, "reward_std": 0.32324281334877014, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7146212458610535, "rewards/format_reward_step": 0.94140625, "step": 60 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.036644992739703e-07, "aux_brier/mean_r": 0.9669798351824284, "aux_brier/n_active_tok": 169.375, "aux_brier/n_step_records": 42.34375, "aux_brier/std_r": 0.07128880363598, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49022843460816673, "calib/avg_num_step_conf": 5.30859375, "calib/ece": 0.3242913385826772, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.007181129877693662, "calib/mean_conf": 0.11185039370078742, "calib/mu_c": 0.10752475247524752, "calib/mu_w": 0.11470588235294119, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01925196850393701, "calib/std_conf": 0.1728686898115672, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1719.0, "completions/max_terminated_length": 1719.0, "completions/mean_length": 271.2578125, "completions/mean_terminated_length": 271.2578125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.06506666666666666, "grad_norm": 0.2882072925567627, "learning_rate": 3.861111111111112e-06, "loss": 0.0092, "num_tokens": 13376491.0, "reward": 1.0456879138946533, "reward_std": 0.2792099118232727, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6358765363693237, "rewards/format_reward_step": 0.984375, "step": 61 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.951955654501015e-07, "aux_brier/mean_r": 0.967688538134098, "aux_brier/n_active_tok": 200.0, "aux_brier/n_step_records": 50.0, "aux_brier/std_r": 0.07479318812693236, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4486486486486486, "calib/avg_num_step_conf": 6.41796875, "calib/ece": 0.2330241935483871, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.012096774193548387, "calib/gap": -0.03152895752895754, "calib/mean_conf": 0.08923387096774194, "calib/mu_c": 0.06571428571428571, "calib/mu_w": 0.09724324324324325, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03411290322580646, "calib/std_conf": 0.17480812563562775, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 346.83203125, "completions/mean_terminated_length": 349.56298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.06613333333333334, "grad_norm": 0.714409351348877, "learning_rate": 3.833333333333334e-06, "loss": 0.013, "num_tokens": 13572360.0, "reward": 0.9079388380050659, "reward_std": 0.23887741565704346, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7098804712295532, "rewards/format_reward_step": 0.9609375, "step": 62 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 9.255839789890086e-07, "aux_brier/mean_r": 0.9725098609924316, "aux_brier/n_active_tok": 186.625, "aux_brier/n_step_records": 46.65625, "aux_brier/std_r": 0.05360891842428828, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5729949874686717, "calib/avg_num_step_conf": 5.87109375, "calib/ece": 0.29992156862745095, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.003997493734335833, "calib/mean_conf": 0.07184313725490196, "calib/mu_c": 0.07452380952380952, "calib/mu_w": 0.07052631578947369, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.021176470588235293, "calib/std_conf": 0.1595709163815998, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2829.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 333.2734375, "completions/mean_terminated_length": 333.2734375, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.0672, "grad_norm": 0.4135409891605377, "learning_rate": 3.8055555555555556e-06, "loss": 0.0482, "num_tokens": 13766318.0, "reward": 0.9836273789405823, "reward_std": 0.3039664328098297, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6766968965530396, "rewards/format_reward_step": 0.97265625, "step": 63 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.198912289532508e-06, "aux_brier/mean_r": 0.9783938154578209, "aux_brier/n_active_tok": 214.375, "aux_brier/n_step_records": 53.59375, "aux_brier/std_r": 0.03761998387381027, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.46738816738816735, "calib/avg_num_step_conf": 6.70703125, "calib/ece": 0.33815261044176703, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.012048192771084338, "calib/gap": -0.022556277056277066, "calib/mean_conf": 0.05220883534136546, "calib/mu_c": 0.03726190476190476, "calib/mu_w": 0.059818181818181826, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02650602409638554, "calib/std_conf": 0.14896005152405523, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 362.625, "completions/mean_terminated_length": 364.0470886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.06826666666666667, "grad_norm": 6.378859996795654, "learning_rate": 3.777777777777778e-06, "loss": 0.0617, "num_tokens": 13962926.0, "reward": 0.9727111458778381, "reward_std": 0.3269904851913452, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6408445239067078, "rewards/format_reward_step": 0.96875, "step": 64 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.5248666553068233e-06, "aux_brier/mean_r": 0.9677706584334373, "aux_brier/n_active_tok": 186.125, "aux_brier/n_step_records": 46.53125, "aux_brier/std_r": 0.06618006560165668, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5391705069124424, "calib/avg_num_step_conf": 5.8203125, "calib/ece": 0.361185770750988, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.029242922975641857, "calib/mean_conf": 0.04616600790513834, "calib/mu_c": 0.06408163265306122, "calib/mu_w": 0.03483870967741936, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009999999999999998, "calib/std_conf": 0.14426015459655267, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 301.3984375, "completions/mean_terminated_length": 301.3984375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.06933333333333333, "grad_norm": 1.8500511646270752, "learning_rate": 3.7500000000000005e-06, "loss": -0.0131, "num_tokens": 14145108.0, "reward": 1.0290679931640625, "reward_std": 0.26811254024505615, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6240843534469604, "rewards/format_reward_step": 0.98046875, "step": 65 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.9905738385970224e-07, "aux_brier/mean_r": 0.9795597232878208, "aux_brier/n_active_tok": 222.625, "aux_brier/n_step_records": 55.65625, "aux_brier/std_r": 0.05493261116134818, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4988561842305933, "calib/avg_num_step_conf": 7.00390625, "calib/ece": 0.3066530612244898, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.012244897959183673, "calib/gap": -0.01000762543846271, "calib/mean_conf": 0.04412244897959184, "calib/mu_c": 0.03734177215189874, "calib/mu_w": 0.04734939759036145, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.014163265306122448, "calib/std_conf": 0.13148657612138567, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2459.0, "completions/max_terminated_length": 2459.0, "completions/mean_length": 399.7890625, "completions/mean_terminated_length": 399.7890625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0704, "grad_norm": 1.1181429624557495, "learning_rate": 3.7222222222222225e-06, "loss": 0.1014, "num_tokens": 14353806.0, "reward": 0.9407802820205688, "reward_std": 0.2905384302139282, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6459336280822754, "rewards/format_reward_step": 0.94140625, "step": 66 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.60169882568173e-06, "aux_brier/mean_r": 0.9723675455898046, "aux_brier/n_active_tok": 197.75, "aux_brier/n_step_records": 49.4375, "aux_brier/std_r": 0.07824312518096121, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5168272447477585, "calib/avg_num_step_conf": 6.23828125, "calib/ece": 0.362094861660079, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003912752575940051, "calib/mean_conf": 0.020118577075098815, "calib/mu_c": 0.017659574468085106, "calib/mu_w": 0.021572327044025157, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005335968379446641, "calib/std_conf": 0.059487150859180285, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1137.0, "completions/max_terminated_length": 1137.0, "completions/mean_length": 332.15625, "completions/mean_terminated_length": 333.4588317871094, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.07146666666666666, "grad_norm": 0.48190027475357056, "learning_rate": 3.694444444444445e-06, "loss": -0.0753, "num_tokens": 14543846.0, "reward": 1.0149729251861572, "reward_std": 0.21961691975593567, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6223918199539185, "rewards/format_reward_step": 0.9765625, "step": 67 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.720155723034416e-06, "aux_brier/mean_r": 0.9803924765437841, "aux_brier/n_active_tok": 189.375, "aux_brier/n_step_records": 47.34375, "aux_brier/std_r": 0.04812505295558367, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49068928184770844, "calib/avg_num_step_conf": 5.953125, "calib/ece": 0.33516129032258063, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00477805846264886, "calib/mean_conf": 0.01725806451612903, "calib/mu_c": 0.014117647058823532, "calib/mu_w": 0.018895705521472392, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004838709677419355, "calib/std_conf": 0.06424297011469138, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 354.30078125, "completions/mean_terminated_length": 354.30078125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.07253333333333334, "grad_norm": 0.09472832083702087, "learning_rate": 3.6666666666666666e-06, "loss": 0.0899, "num_tokens": 14738635.0, "reward": 0.9719752073287964, "reward_std": 0.28729328513145447, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.637900710105896, "rewards/format_reward_step": 0.9609375, "step": 68 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.1624227527762265e-06, "aux_brier/mean_r": 0.9697585087269545, "aux_brier/n_active_tok": 211.125, "aux_brier/n_step_records": 52.78125, "aux_brier/std_r": 0.06189129345875699, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5250147362216327, "calib/avg_num_step_conf": 6.59765625, "calib/ece": 0.3134523809523809, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011224580017683469, "calib/mean_conf": 0.013134920634920636, "calib/mu_c": 0.005384615384615385, "calib/mu_w": 0.016609195402298854, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008531746031746033, "calib/std_conf": 0.07157589524797617, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 397.47265625, "completions/mean_terminated_length": 397.47265625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.0736, "grad_norm": 0.30143827199935913, "learning_rate": 3.638888888888889e-06, "loss": 0.0565, "num_tokens": 14944884.0, "reward": 0.9643608331680298, "reward_std": 0.24725191295146942, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.6777558326721191, "rewards/format_reward_step": 0.98046875, "step": 69 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.056338494420661e-05, "aux_brier/mean_r": 0.9685336444526911, "aux_brier/n_active_tok": 223.875, "aux_brier/n_step_records": 55.96875, "aux_brier/std_r": 0.07177392703215446, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.49909665763324296, "calib/avg_num_step_conf": 7.01171875, "calib/ece": 0.32551020408163267, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002058867810900332, "calib/mean_conf": 0.007551020408163266, "calib/mu_c": 0.006172839506172839, "calib/mu_w": 0.008231707317073171, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012244897959183675, "calib/std_conf": 0.028794175448779463, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2308.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 390.90234375, "completions/mean_terminated_length": 390.90234375, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.07466666666666667, "grad_norm": 0.5213795304298401, "learning_rate": 3.6111111111111115e-06, "loss": 0.1049, "num_tokens": 15151947.0, "reward": 0.9451004862785339, "reward_std": 0.2862257957458496, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6319644451141357, "rewards/format_reward_step": 0.94140625, "step": 70 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -7.100035218726708e-06, "aux_brier/mean_r": 0.9685591030865908, "aux_brier/n_active_tok": 254.875, "aux_brier/n_step_records": 63.71875, "aux_brier/std_r": 0.06444620902402676, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5225911223385059, "calib/avg_num_step_conf": 8.10546875, "calib/ece": 0.3372983870967741, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0075907614579574165, "calib/mean_conf": 0.012540322580645162, "calib/mu_c": 0.017529411764705884, "calib/mu_w": 0.009938650306748467, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003548387096774194, "calib/std_conf": 0.07768058037655153, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2696.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 447.5234375, "completions/mean_terminated_length": 449.2784729003906, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.07573333333333333, "grad_norm": 1.28630793094635, "learning_rate": 3.5833333333333335e-06, "loss": 0.0771, "num_tokens": 15370921.0, "reward": 0.9682075381278992, "reward_std": 0.30183982849121094, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6306425333023071, "rewards/format_reward_step": 0.95703125, "step": 71 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.966458784697812e-06, "aux_brier/mean_r": 0.9752984382212162, "aux_brier/n_active_tok": 248.125, "aux_brier/n_step_records": 62.03125, "aux_brier/std_r": 0.05803983546547897, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5102653075977734, "calib/avg_num_step_conf": 7.921875, "calib/ece": 0.35239837398373985, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.008130081300813009, "calib/gap": 0.0054760355671221, "calib/mean_conf": 0.011747967479674798, "calib/mu_c": 0.015287356321839082, "calib/mu_w": 0.009811320754716982, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0052439024390243906, "calib/std_conf": 0.09002141511215923, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2295.0, "completions/max_terminated_length": 2295.0, "completions/mean_length": 407.671875, "completions/mean_terminated_length": 409.2705993652344, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.0768, "grad_norm": 0.23760710656642914, "learning_rate": 3.555555555555556e-06, "loss": 0.111, "num_tokens": 15579693.0, "reward": 0.9680535793304443, "reward_std": 0.2937406897544861, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.6144019365310669, "rewards/format_reward_step": 0.94921875, "step": 72 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 9.756778022174117e-06, "aux_brier/mean_r": 0.9794846288859844, "aux_brier/n_active_tok": 232.75, "aux_brier/n_step_records": 58.1875, "aux_brier/std_r": 0.0567249926907607, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5356845898004434, "calib/avg_num_step_conf": 7.296875, "calib/ece": 0.34512063492063494, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002976385809312639, "calib/mean_conf": 0.004085714285714286, "calib/mu_c": 0.006022727272727273, "calib/mu_w": 0.0030463414634146345, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012259998927248937, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2317.0, "completions/max_terminated_length": 2317.0, "completions/mean_length": 410.0078125, "completions/mean_terminated_length": 410.0078125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.07786666666666667, "grad_norm": 0.8586456775665283, "learning_rate": 3.5277777777777784e-06, "loss": 0.0729, "num_tokens": 15791687.0, "reward": 0.9941590428352356, "reward_std": 0.3010594844818115, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6406984925270081, "rewards/format_reward_step": 0.98046875, "step": 73 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.7842377689758493e-06, "aux_brier/mean_r": 0.9822011701762676, "aux_brier/n_active_tok": 228.5, "aux_brier/n_step_records": 57.125, "aux_brier/std_r": 0.05300872658153821, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5037786774628881, "calib/avg_num_step_conf": 7.2109375, "calib/ece": 0.3751394422310757, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025755735492577598, "calib/mean_conf": 0.0033466135458167334, "calib/mu_c": 0.004947368421052632, "calib/mu_w": 0.002371794871794872, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.017422434205114093, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 385.20703125, "completions/mean_terminated_length": 386.7176818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.07893333333333333, "grad_norm": 1.0573453903198242, "learning_rate": 3.5e-06, "loss": 0.007, "num_tokens": 15994228.0, "reward": 1.0086534023284912, "reward_std": 0.26831555366516113, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6049261689186096, "rewards/format_reward_step": 0.97265625, "step": 74 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 9.24522598882982e-06, "aux_brier/mean_r": 0.956381956115365, "aux_brier/n_active_tok": 220.75, "aux_brier/n_step_records": 55.1875, "aux_brier/std_r": 0.09521507852241484, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4974664224664225, "calib/avg_num_step_conf": 6.8984375, "calib/ece": 0.5018359375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006117216117216108, "calib/mean_conf": 0.007929687500000001, "calib/mu_c": 0.00823076923076923, "calib/mu_w": 0.00761904761904762, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009765625, "calib/std_conf": 0.03578378726675453, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1697.0, "completions/max_terminated_length": 1697.0, "completions/mean_length": 359.33984375, "completions/mean_terminated_length": 360.7490539550781, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.08, "grad_norm": 0.44701892137527466, "learning_rate": 3.4722222222222224e-06, "loss": -0.0068, "num_tokens": 16190971.0, "reward": 1.1326134204864502, "reward_std": 0.2723809480667114, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.49920350313186646, "rewards/format_reward_step": 1.0, "step": 75 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.750671296599451e-06, "aux_brier/mean_r": 0.9684503898024559, "aux_brier/n_active_tok": 262.75, "aux_brier/n_step_records": 65.6875, "aux_brier/std_r": 0.0701445845629678, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49924092409240933, "calib/avg_num_step_conf": 8.2109375, "calib/ece": 0.39996015936254986, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017320132013201315, "calib/mean_conf": 0.002430278884462151, "calib/mu_c": 0.003465346534653465, "calib/mu_w": 0.0017333333333333335, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010063054172803267, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2425.0, "completions/max_terminated_length": 2425.0, "completions/mean_length": 426.3515625, "completions/mean_terminated_length": 426.3515625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.08106666666666666, "grad_norm": 0.04451752081513405, "learning_rate": 3.444444444444445e-06, "loss": 0.0517, "num_tokens": 16403173.0, "reward": 1.036790132522583, "reward_std": 0.2646830976009369, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.584660530090332, "rewards/format_reward_step": 0.9765625, "step": 76 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.199869627601437e-06, "aux_brier/mean_r": 0.9572847075760365, "aux_brier/n_active_tok": 260.5, "aux_brier/n_step_records": 65.125, "aux_brier/std_r": 0.0853509036860487, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5096405453794243, "calib/avg_num_step_conf": 8.46875, "calib/ece": 0.4321810699588477, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004057980994353394, "calib/mean_conf": 0.004032921810699589, "calib/mu_c": 0.006320754716981131, "calib/mu_w": 0.002262773722627737, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022152458699712014, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 425.5390625, "completions/mean_terminated_length": 430.5849914550781, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.08213333333333334, "grad_norm": 0.919904887676239, "learning_rate": 3.416666666666667e-06, "loss": 0.1436, "num_tokens": 16616775.0, "reward": 1.0255446434020996, "reward_std": 0.3390381634235382, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5396785140037537, "rewards/format_reward_step": 0.9453125, "step": 77 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.258725189856591e-06, "aux_brier/mean_r": 0.9674650076776743, "aux_brier/n_active_tok": 270.0, "aux_brier/n_step_records": 67.5, "aux_brier/std_r": 0.07095844625837344, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4753264144626715, "calib/avg_num_step_conf": 8.4375, "calib/ece": 0.4134677419354839, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011797790425175764, "calib/mean_conf": 0.0018548387096774192, "calib/mu_c": 0.0011650485436893203, "calib/mu_w": 0.0023448275862068967, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006073067923730435, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2447.0, "completions/max_terminated_length": 2447.0, "completions/mean_length": 460.56640625, "completions/mean_terminated_length": 462.3725891113281, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.0832, "grad_norm": 3.8983356952667236, "learning_rate": 3.3888888888888893e-06, "loss": 0.0699, "num_tokens": 16842704.0, "reward": 1.0324511528015137, "reward_std": 0.2954614460468292, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5673046708106995, "rewards/format_reward_step": 0.96875, "step": 78 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.6347339716982785e-06, "aux_brier/mean_r": 0.9604005198925734, "aux_brier/n_active_tok": 254.25, "aux_brier/n_step_records": 63.5625, "aux_brier/std_r": 0.08153873514288534, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5135981912144703, "calib/avg_num_step_conf": 7.97265625, "calib/ece": 0.47927710843373494, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027267441860465112, "calib/mean_conf": 0.004337349397590362, "calib/mu_c": 0.00575, "calib/mu_w": 0.0030232558139534887, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008433734939759035, "calib/std_conf": 0.029090335872366237, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2424.0, "completions/max_terminated_length": 2424.0, "completions/mean_length": 442.6640625, "completions/mean_terminated_length": 444.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.08426666666666667, "grad_norm": 0.7313966751098633, "learning_rate": 3.3611111111111117e-06, "loss": 0.025, "num_tokens": 17062402.0, "reward": 1.0805904865264893, "reward_std": 0.24945639073848724, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5020492076873779, "rewards/format_reward_step": 0.96484375, "step": 79 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.037131247800694e-06, "aux_brier/mean_r": 0.9589800387620926, "aux_brier/n_active_tok": 252.75, "aux_brier/n_step_records": 63.1875, "aux_brier/std_r": 0.09060828908786789, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.534564393939394, "calib/avg_num_step_conf": 8.1328125, "calib/ece": 0.4335590551181102, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0021386363636363634, "calib/mean_conf": 0.003448818897637796, "calib/mu_c": 0.002236363636363637, "calib/mu_w": 0.004375, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.03165480860870613, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1865.0, "completions/max_terminated_length": 1865.0, "completions/mean_length": 398.8203125, "completions/mean_terminated_length": 400.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.08533333333333333, "grad_norm": 0.26339295506477356, "learning_rate": 3.3333333333333333e-06, "loss": 0.018, "num_tokens": 17266660.0, "reward": 1.063705563545227, "reward_std": 0.2721478343009949, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5595096349716187, "rewards/format_reward_step": 0.98828125, "step": 80 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.7928283943827172e-05, "aux_brier/mean_r": 0.9629630520939827, "aux_brier/n_active_tok": 270.375, "aux_brier/n_step_records": 67.59375, "aux_brier/std_r": 0.08666157643642691, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5151729004940014, "calib/avg_num_step_conf": 8.63671875, "calib/ece": 0.455397489539749, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005765702187720536, "calib/mean_conf": 0.0015062761506276152, "calib/mu_c": 0.0011926605504587156, "calib/mu_w": 0.0017692307692307693, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00041841004184100416, "calib/std_conf": 0.007447933802881778, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 463.109375, "completions/mean_terminated_length": 468.6007995605469, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.0864, "grad_norm": 0.11291259527206421, "learning_rate": 3.3055555555555558e-06, "loss": 0.1083, "num_tokens": 17491464.0, "reward": 1.018804907798767, "reward_std": 0.3258718252182007, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5049070119857788, "rewards/format_reward_step": 0.92578125, "step": 81 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.0135010946116527e-05, "aux_brier/mean_r": 0.9712448623031378, "aux_brier/n_active_tok": 230.125, "aux_brier/n_step_records": 57.53125, "aux_brier/std_r": 0.05702007134186715, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4574626865671642, "calib/avg_num_step_conf": 7.203125, "calib/ece": 0.47137795275590555, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0022437810945273636, "calib/mean_conf": 0.0018503937007874017, "calib/mu_c": 0.0006666666666666666, "calib/mu_w": 0.00291044776119403, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.00833130774977, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2286.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 396.9140625, "completions/mean_terminated_length": 396.9140625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.08746666666666666, "grad_norm": 0.4578200578689575, "learning_rate": 3.277777777777778e-06, "loss": 0.0324, "num_tokens": 17698626.0, "reward": 1.092911720275879, "reward_std": 0.28024643659591675, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5200843811035156, "rewards/format_reward_step": 0.98828125, "step": 82 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.949938222531536e-06, "aux_brier/mean_r": 0.9571703709661961, "aux_brier/n_active_tok": 276.875, "aux_brier/n_step_records": 69.21875, "aux_brier/std_r": 0.0937192927463002, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.49286666666666673, "calib/avg_num_step_conf": 8.65625, "calib/ece": 0.4894285714285714, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0016500000000000002, "calib/mean_conf": 0.001591836734693878, "calib/mu_c": 0.00075, "calib/mu_w": 0.0024000000000000002, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0006122448979591838, "calib/std_conf": 0.010667360799294454, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2430.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 482.1484375, "completions/mean_terminated_length": 482.1484375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.08853333333333334, "grad_norm": 0.9561845064163208, "learning_rate": 3.2500000000000002e-06, "loss": 0.098, "num_tokens": 17929320.0, "reward": 1.0655776262283325, "reward_std": 0.2542036175727844, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.48106053471565247, "rewards/format_reward_step": 0.9453125, "step": 83 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.68112618171335e-06, "aux_brier/mean_r": 0.9445101730525494, "aux_brier/n_active_tok": 261.375, "aux_brier/n_step_records": 65.34375, "aux_brier/std_r": 0.09457180820754729, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5009433962264151, "calib/avg_num_step_conf": 8.53125, "calib/ece": 0.4324390243902439, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005222371967654987, "calib/mean_conf": 0.0038211382113821145, "calib/mu_c": 0.0008490566037735849, "calib/mu_w": 0.006071428571428572, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002682926829268293, "calib/std_conf": 0.04235007655329952, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2562.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 422.9296875, "completions/mean_terminated_length": 427.9446716308594, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.0896, "grad_norm": 0.4514802098274231, "learning_rate": 3.2222222222222227e-06, "loss": 0.0611, "num_tokens": 18143510.0, "reward": 1.028487205505371, "reward_std": 0.24244725704193115, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.543635904788971, "rewards/format_reward_step": 0.95703125, "step": 84 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -6.0621015596229455e-06, "aux_brier/mean_r": 0.9580962657928467, "aux_brier/n_active_tok": 276.375, "aux_brier/n_step_records": 69.09375, "aux_brier/std_r": 0.08610973766053576, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5192643894566971, "calib/avg_num_step_conf": 8.734375, "calib/ece": 0.420080971659919, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000979020979020979, "calib/mean_conf": 0.0009716599190283401, "calib/mu_c": 0.0015384615384615385, "calib/mu_w": 0.0005594405594405594, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004573644627634672, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2549.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 450.703125, "completions/mean_terminated_length": 452.4706115722656, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.09066666666666667, "grad_norm": 0.9802902340888977, "learning_rate": 3.1944444444444443e-06, "loss": 0.0753, "num_tokens": 18366714.0, "reward": 1.028627634048462, "reward_std": 0.28055432438850403, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.559822678565979, "rewards/format_reward_step": 0.96484375, "step": 85 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.481456055581811e-05, "aux_brier/mean_r": 0.9600542802363634, "aux_brier/n_active_tok": 246.5, "aux_brier/n_step_records": 61.625, "aux_brier/std_r": 0.09055225767588126, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5300671140939598, "calib/avg_num_step_conf": 7.703125, "calib/ece": 0.40080321285140563, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000997315436241611, "calib/mean_conf": 0.0008032128514056225, "calib/mu_c": 0.0014000000000000002, "calib/mu_w": 0.00040268456375838925, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0039267169675760065, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 432.52734375, "completions/mean_terminated_length": 432.52734375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.09173333333333333, "grad_norm": 0.029200656339526176, "learning_rate": 3.1666666666666667e-06, "loss": 0.0704, "num_tokens": 18582953.0, "reward": 1.0305429697036743, "reward_std": 0.29986464977264404, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5831093788146973, "rewards/format_reward_step": 0.97265625, "step": 86 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.039499872751426e-05, "aux_brier/mean_r": 0.9470968414098024, "aux_brier/n_active_tok": 234.125, "aux_brier/n_step_records": 58.53125, "aux_brier/std_r": 0.11808075748695046, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46828880407124684, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.52398406374502, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008111959287531808, "calib/mean_conf": 0.005099601593625498, "calib/mu_c": 0.0012213740458015267, "calib/mu_w": 0.009333333333333334, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0035856573705179283, "calib/std_conf": 0.05687312047453168, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2372.0, "completions/max_terminated_length": 2372.0, "completions/mean_length": 396.2265625, "completions/mean_terminated_length": 397.7804260253906, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.0928, "grad_norm": 0.5480499267578125, "learning_rate": 3.138888888888889e-06, "loss": 0.0713, "num_tokens": 18789883.0, "reward": 1.1157242059707642, "reward_std": 0.30910974740982056, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.4628968834877014, "rewards/format_reward_step": 0.9765625, "step": 87 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -6.344828712784301e-06, "aux_brier/mean_r": 0.9597196727991104, "aux_brier/n_active_tok": 269.0, "aux_brier/n_step_records": 67.25, "aux_brier/std_r": 0.0984702772698256, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47521482621521094, "calib/avg_num_step_conf": 8.4375, "calib/ece": 0.4520318725099602, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005895216108759781, "calib/mean_conf": 0.005099601593625498, "calib/mu_c": 0.001858407079646018, "calib/mu_w": 0.007753623188405799, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003466135458167331, "calib/std_conf": 0.044501332177568846, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2276.0, "completions/max_terminated_length": 2276.0, "completions/mean_length": 458.59375, "completions/mean_terminated_length": 460.3921813964844, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.09386666666666667, "grad_norm": 0.8200100064277649, "learning_rate": 3.1111111111111116e-06, "loss": 0.0694, "num_tokens": 19017131.0, "reward": 1.0585120916366577, "reward_std": 0.2788357436656952, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5309234261512756, "rewards/format_reward_step": 0.96875, "step": 88 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 9.938230941586301e-06, "aux_brier/mean_r": 0.9528448935598135, "aux_brier/n_active_tok": 265.5, "aux_brier/n_step_records": 66.375, "aux_brier/std_r": 0.08498656061055954, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4878891572732153, "calib/avg_num_step_conf": 8.546875, "calib/ece": 0.4384105691056911, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002736714975845411, "calib/mean_conf": 0.0026463414634146343, "calib/mu_c": 0.0011111111111111111, "calib/mu_w": 0.0038478260869565218, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0010162601626016261, "calib/std_conf": 0.01693822248419394, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2491.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 452.625, "completions/mean_terminated_length": 454.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.09493333333333333, "grad_norm": 0.6837942004203796, "learning_rate": 3.0833333333333336e-06, "loss": 0.0745, "num_tokens": 19241891.0, "reward": 1.0343434810638428, "reward_std": 0.25882071256637573, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5358113050460815, "rewards/format_reward_step": 0.95703125, "step": 89 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 5.856085002697853e-06, "aux_brier/mean_r": 0.9318591877818108, "aux_brier/n_active_tok": 241.375, "aux_brier/n_step_records": 60.34375, "aux_brier/std_r": 0.12055842822337581, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.501918699186992, "calib/avg_num_step_conf": 7.5625, "calib/ece": 0.49458870967741936, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00016741463414634184, "calib/mean_conf": 0.001379032258064516, "calib/mu_c": 0.0014634146341463417, "calib/mu_w": 0.0012959999999999998, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005053093498262744, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 422.40234375, "completions/mean_terminated_length": 424.058837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.096, "grad_norm": 0.4092867076396942, "learning_rate": 3.055555555555556e-06, "loss": 0.0532, "num_tokens": 19453346.0, "reward": 1.08530592918396, "reward_std": 0.25320422649383545, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.48966091871261597, "rewards/format_reward_step": 0.96484375, "step": 90 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.0388758110435603e-06, "aux_brier/mean_r": 0.9685104470700026, "aux_brier/n_active_tok": 238.125, "aux_brier/n_step_records": 59.53125, "aux_brier/std_r": 0.07888051881855063, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5183712121212122, "calib/avg_num_step_conf": 7.5390625, "calib/ece": 0.5226190476190476, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003636363636363637, "calib/mean_conf": 0.0011904761904761906, "calib/mu_c": 0.0013636363636363637, "calib/mu_w": 0.001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0034748855998396246, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1556.0, "completions/max_terminated_length": 1556.0, "completions/mean_length": 394.4375, "completions/mean_terminated_length": 394.4375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.09706666666666666, "grad_norm": 0.44405147433280945, "learning_rate": 3.0277777777777776e-06, "loss": 0.0348, "num_tokens": 19662034.0, "reward": 1.1292545795440674, "reward_std": 0.27326512336730957, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.4701429605484009, "rewards/format_reward_step": 0.984375, "step": 91 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.2956444874044912e-05, "aux_brier/mean_r": 0.9815721902996302, "aux_brier/n_active_tok": 228.0, "aux_brier/n_step_records": 57.0, "aux_brier/std_r": 0.051906031240605444, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4721227621483376, "calib/avg_num_step_conf": 7.2421875, "calib/ece": 0.4571314741035856, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00030690537084398985, "calib/mean_conf": 0.0010358565737051792, "calib/mu_c": 0.0008695652173913044, "calib/mu_w": 0.0011764705882352942, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003750525748450138, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 396.76953125, "completions/mean_terminated_length": 398.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.09813333333333334, "grad_norm": 0.23380407691001892, "learning_rate": 3e-06, "loss": 0.0669, "num_tokens": 19870327.0, "reward": 1.070504069328308, "reward_std": 0.2657574415206909, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.532016396522522, "rewards/format_reward_step": 0.9765625, "step": 92 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3357439757832879e-05, "aux_brier/mean_r": 0.9722327664494514, "aux_brier/n_active_tok": 291.75, "aux_brier/n_step_records": 72.9375, "aux_brier/std_r": 0.0773887043853847, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4800039401103231, "calib/avg_num_step_conf": 9.17578125, "calib/ece": 0.43281124497991963, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012509850275807723, "calib/mean_conf": 0.0017269076305220885, "calib/mu_c": 0.0010185185185185184, "calib/mu_w": 0.0022695035460992908, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004016064257028112, "calib/std_conf": 0.008005377138399793, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2615.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 493.62890625, "completions/mean_terminated_length": 495.5647277832031, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0992, "grad_norm": 0.08356130123138428, "learning_rate": 2.9722222222222225e-06, "loss": 0.1167, "num_tokens": 20102472.0, "reward": 1.0431673526763916, "reward_std": 0.31661662459373474, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5476691126823425, "rewards/format_reward_step": 0.96875, "step": 93 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 8.124847299367799e-06, "aux_brier/mean_r": 0.9563562832772732, "aux_brier/n_active_tok": 220.25, "aux_brier/n_step_records": 55.0625, "aux_brier/std_r": 0.09532306047367456, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4649954832881663, "calib/avg_num_step_conf": 7.140625, "calib/ece": 0.49257028112449797, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001794425087108014, "calib/mean_conf": 0.002208835341365462, "calib/mu_c": 0.0013008130081300813, "calib/mu_w": 0.0030952380952380953, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004016064257028112, "calib/std_conf": 0.008379823866953328, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2828.0, "completions/max_terminated_length": 2828.0, "completions/mean_length": 401.62890625, "completions/mean_terminated_length": 404.7913513183594, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.10026666666666667, "grad_norm": 0.19901172816753387, "learning_rate": 2.944444444444445e-06, "loss": 0.059, "num_tokens": 20313969.0, "reward": 1.0901379585266113, "reward_std": 0.2366088628768921, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.49336445331573486, "rewards/format_reward_step": 0.97265625, "step": 94 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.599527843066829e-06, "aux_brier/mean_r": 0.9509124830365181, "aux_brier/n_active_tok": 267.75, "aux_brier/n_step_records": 66.9375, "aux_brier/std_r": 0.11357305465116951, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5123377042826691, "calib/avg_num_step_conf": 8.5546875, "calib/ece": 0.5469999999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015405981525741228, "calib/mean_conf": 0.004200000000000001, "calib/mu_c": 0.0035036496350364962, "calib/mu_w": 0.005044247787610619, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015999999999999999, "calib/std_conf": 0.023907321054438536, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 453.4296875, "completions/mean_terminated_length": 455.2078552246094, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.10133333333333333, "grad_norm": 0.2598918080329895, "learning_rate": 2.916666666666667e-06, "loss": 0.0535, "num_tokens": 20536175.0, "reward": 1.131653070449829, "reward_std": 0.26526156067848206, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4406746029853821, "rewards/format_reward_step": 0.97265625, "step": 95 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.244284340650719e-06, "aux_brier/mean_r": 0.9431368261575699, "aux_brier/n_active_tok": 251.125, "aux_brier/n_step_records": 62.78125, "aux_brier/std_r": 0.09904260898474604, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49042950513538747, "calib/avg_num_step_conf": 7.8515625, "calib/ece": 0.587831325301205, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011624649859943979, "calib/mean_conf": 0.0033333333333333335, "calib/mu_c": 0.00380952380952381, "calib/mu_w": 0.002647058823529412, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004016064257028112, "calib/std_conf": 0.02043044162796753, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 432.9140625, "completions/mean_terminated_length": 432.9140625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.1024, "grad_norm": 0.08151677250862122, "learning_rate": 2.888888888888889e-06, "loss": 0.0891, "num_tokens": 20752817.0, "reward": 1.158216118812561, "reward_std": 0.2332727313041687, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.39848947525024414, "rewards/format_reward_step": 0.96875, "step": 96 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.802699897443752e-06, "aux_brier/mean_r": 0.9669178631156683, "aux_brier/n_active_tok": 246.5, "aux_brier/n_step_records": 61.625, "aux_brier/std_r": 0.08885958983046294, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47009141494435613, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.40748, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004157392686804452, "calib/mean_conf": 0.004520000000000001, "calib/mu_c": 0.002058823529411765, "calib/mu_w": 0.006216216216216217, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002, "calib/std_conf": 0.03199952499647455, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2117.0, "completions/max_terminated_length": 2117.0, "completions/mean_length": 385.70703125, "completions/mean_terminated_length": 390.2806396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.10346666666666667, "grad_norm": 0.3933573067188263, "learning_rate": 2.861111111111111e-06, "loss": 0.0064, "num_tokens": 20956630.0, "reward": 1.0314052104949951, "reward_std": 0.3018680512905121, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5787457227706909, "rewards/format_reward_step": 0.9765625, "step": 97 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.228004196469513e-06, "aux_brier/mean_r": 0.9387944806367159, "aux_brier/n_active_tok": 251.5, "aux_brier/n_step_records": 62.875, "aux_brier/std_r": 0.11519359098315363, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5200456323337679, "calib/avg_num_step_conf": 8.2578125, "calib/ece": 0.4731451612903225, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015514993481095178, "calib/mean_conf": 0.002661290322580645, "calib/mu_c": 0.003474576271186441, "calib/mu_w": 0.0019230769230769232, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008142765537158253, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 424.65625, "completions/mean_terminated_length": 429.69171142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.10453333333333334, "grad_norm": 0.3319334387779236, "learning_rate": 2.8333333333333335e-06, "loss": 0.0477, "num_tokens": 21171526.0, "reward": 1.073048710823059, "reward_std": 0.3056885302066803, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5109445452690125, "rewards/format_reward_step": 0.96875, "step": 98 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.425479422920375e-06, "aux_brier/mean_r": 0.9761198349297047, "aux_brier/n_active_tok": 273.75, "aux_brier/n_step_records": 68.4375, "aux_brier/std_r": 0.06148804390841178, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5082608348631175, "calib/avg_num_step_conf": 8.7890625, "calib/ece": 0.2609842519685039, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002578018995929442, "calib/mean_conf": 0.002795275590551181, "calib/mu_c": 0.0029850746268656717, "calib/mu_w": 0.0027272727272727275, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0075595677319624955, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1526.0, "completions/max_terminated_length": 1526.0, "completions/mean_length": 462.3671875, "completions/mean_terminated_length": 464.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.1056, "grad_norm": 0.06440990418195724, "learning_rate": 2.805555555555556e-06, "loss": -0.0183, "num_tokens": 21395692.0, "reward": 0.9408042430877686, "reward_std": 0.2303566038608551, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7319667339324951, "rewards/format_reward_step": 0.9921875, "step": 99 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.860278470879663e-06, "aux_brier/mean_r": 0.949141837656498, "aux_brier/n_active_tok": 254.75, "aux_brier/n_step_records": 63.6875, "aux_brier/std_r": 0.09752273821641211, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4810764234108835, "calib/avg_num_step_conf": 7.97265625, "calib/ece": 0.4285483870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005037449459799831, "calib/mean_conf": 0.0029032258064516127, "calib/mu_c": 0.002616822429906542, "calib/mu_w": 0.0031205673758865253, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006753039166276877, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2430.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 465.24609375, "completions/mean_terminated_length": 465.24609375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.10666666666666667, "grad_norm": 0.07239007204771042, "learning_rate": 2.7777777777777783e-06, "loss": 0.0726, "num_tokens": 21622203.0, "reward": 1.0386197566986084, "reward_std": 0.3322659432888031, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.552916407585144, "rewards/format_reward_step": 0.96484375, "step": 100 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.725101696079204e-06, "aux_brier/mean_r": 0.9747386667877436, "aux_brier/n_active_tok": 335.5, "aux_brier/n_step_records": 83.875, "aux_brier/std_r": 0.06250320563481182, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5677235433180364, "calib/avg_num_step_conf": 10.54296875, "calib/ece": 0.4145679012345679, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008516896120150186, "calib/mean_conf": 0.007901234567901235, "calib/mu_c": 0.01284313725490196, "calib/mu_w": 0.004326241134751774, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0013580246913580249, "calib/std_conf": 0.05584478893460641, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 548.51953125, "completions/mean_terminated_length": 550.6705932617188, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.10773333333333333, "grad_norm": 0.11519607901573181, "learning_rate": 2.7500000000000004e-06, "loss": 0.1605, "num_tokens": 21869616.0, "reward": 1.006725549697876, "reward_std": 0.2990740239620209, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5503398180007935, "rewards/format_reward_step": 0.94140625, "step": 101 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.4915509578837813e-06, "aux_brier/mean_r": 0.9620214179158211, "aux_brier/n_active_tok": 255.75, "aux_brier/n_step_records": 63.9375, "aux_brier/std_r": 0.08073153806253686, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5255687342018277, "calib/avg_num_step_conf": 7.9921875, "calib/ece": 0.5527599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009663620454987355, "calib/mean_conf": 0.0032400000000000003, "calib/mu_c": 0.003669064748201439, "calib/mu_w": 0.0027027027027027033, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01070992063462657, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 416.109375, "completions/mean_terminated_length": 417.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.1088, "grad_norm": 0.3080579936504364, "learning_rate": 2.7222222222222224e-06, "loss": 0.0251, "num_tokens": 22082836.0, "reward": 1.1357409954071045, "reward_std": 0.25172191858291626, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.4335886836051941, "rewards/format_reward_step": 0.96875, "step": 102 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.1699593362773975e-06, "aux_brier/mean_r": 0.9732596483081579, "aux_brier/n_active_tok": 278.5, "aux_brier/n_step_records": 69.625, "aux_brier/std_r": 0.07219203443764854, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.47738842975206613, "calib/avg_num_step_conf": 8.70703125, "calib/ece": 0.5065447154471544, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.008130081300813009, "calib/gap": -0.0008800000000000006, "calib/mean_conf": 0.009552845528455287, "calib/mu_c": 0.009120000000000001, "calib/mu_w": 0.010000000000000002, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003983739837398375, "calib/std_conf": 0.08889682548513114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3056.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 528.35546875, "completions/mean_terminated_length": 528.35546875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.10986666666666667, "grad_norm": 0.14490723609924316, "learning_rate": 2.6944444444444444e-06, "loss": 0.151, "num_tokens": 22322647.0, "reward": 1.085228443145752, "reward_std": 0.2544863224029541, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.47372615337371826, "rewards/format_reward_step": 0.95703125, "step": 103 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.8544163035548742e-05, "aux_brier/mean_r": 0.97101273201406, "aux_brier/n_active_tok": 246.0, "aux_brier/n_step_records": 61.5, "aux_brier/std_r": 0.07220473135703287, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.44815515610217593, "calib/avg_num_step_conf": 7.80859375, "calib/ece": 0.4094140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0051359192683696, "calib/mean_conf": 0.0046484375, "calib/mu_c": 0.0016190476190476191, "calib/mu_w": 0.006754966887417219, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001953125, "calib/std_conf": 0.03168252915738568, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1650.0, "completions/max_terminated_length": 1650.0, "completions/mean_length": 423.01953125, "completions/mean_terminated_length": 424.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.11093333333333333, "grad_norm": 0.277452290058136, "learning_rate": 2.666666666666667e-06, "loss": 0.0219, "num_tokens": 22537620.0, "reward": 1.0547630786895752, "reward_std": 0.2131832391023636, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5862402319908142, "rewards/format_reward_step": 0.99609375, "step": 104 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -9.282561072521123e-06, "aux_brier/mean_r": 0.9753809086978436, "aux_brier/n_active_tok": 275.375, "aux_brier/n_step_records": 68.84375, "aux_brier/std_r": 0.066047161394863, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5117991056071551, "calib/avg_num_step_conf": 8.76171875, "calib/ece": 0.38024193548387103, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001104919160646715, "calib/mean_conf": 0.0036290322580645167, "calib/mu_c": 0.0029473684210526317, "calib/mu_w": 0.004052287581699347, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004032258064516129, "calib/std_conf": 0.009986658634216891, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2797.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 465.19140625, "completions/mean_terminated_length": 468.8543395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.112, "grad_norm": 0.14374728500843048, "learning_rate": 2.6388888888888893e-06, "loss": 0.0618, "num_tokens": 22762469.0, "reward": 0.9995492100715637, "reward_std": 0.27755358815193176, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.5919468402862549, "rewards/format_reward_step": 0.9609375, "step": 105 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.5305782265120915e-05, "aux_brier/mean_r": 0.9544075541198254, "aux_brier/n_active_tok": 236.875, "aux_brier/n_step_records": 59.21875, "aux_brier/std_r": 0.0973916576145939, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48832083168838003, "calib/avg_num_step_conf": 7.40625, "calib/ece": 0.40422310756972113, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00017436504803263614, "calib/mean_conf": 0.002151394422310757, "calib/mu_c": 0.0022549019607843138, "calib/mu_w": 0.0020805369127516776, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.005219579676545758, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 428.83203125, "completions/mean_terminated_length": 428.83203125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.11306666666666666, "grad_norm": 0.2856384515762329, "learning_rate": 2.6111111111111113e-06, "loss": 0.0761, "num_tokens": 22976834.0, "reward": 1.029738426208496, "reward_std": 0.23654037714004517, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5798909664154053, "rewards/format_reward_step": 0.97265625, "step": 106 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.3012114725485802e-05, "aux_brier/mean_r": 0.9632170852273703, "aux_brier/n_active_tok": 256.875, "aux_brier/n_step_records": 64.21875, "aux_brier/std_r": 0.087117676856451, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5036398467432951, "calib/avg_num_step_conf": 8.1640625, "calib/ece": 0.5347808764940238, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.386973180076658e-05, "calib/mean_conf": 0.003067729083665339, "calib/mu_c": 0.0031111111111111114, "calib/mu_w": 0.003017241379310345, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005332695739203866, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2352.0, "completions/max_terminated_length": 2352.0, "completions/mean_length": 429.0546875, "completions/mean_terminated_length": 432.4330749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.11413333333333334, "grad_norm": 0.07876795530319214, "learning_rate": 2.5833333333333337e-06, "loss": 0.0294, "num_tokens": 23191288.0, "reward": 1.1316704750061035, "reward_std": 0.2527458667755127, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4563691020011902, "rewards/format_reward_step": 0.98046875, "step": 107 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.589351832875785e-05, "aux_brier/mean_r": 0.9550943691283464, "aux_brier/n_active_tok": 262.875, "aux_brier/n_step_records": 65.71875, "aux_brier/std_r": 0.11002339723108889, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5033237747653807, "calib/avg_num_step_conf": 8.21875, "calib/ece": 0.5466666666666665, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00023005735140771639, "calib/mean_conf": 0.0035341365461847396, "calib/mu_c": 0.0034306569343065694, "calib/mu_w": 0.003660714285714286, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009997983700212187, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2407.0, "completions/max_terminated_length": 2407.0, "completions/mean_length": 469.921875, "completions/mean_terminated_length": 469.921875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.1152, "grad_norm": 0.24026797711849213, "learning_rate": 2.5555555555555557e-06, "loss": 0.0073, "num_tokens": 23414820.0, "reward": 1.1356562376022339, "reward_std": 0.2717841863632202, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4410625100135803, "rewards/format_reward_step": 0.97265625, "step": 108 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.1035821004017352e-05, "aux_brier/mean_r": 0.9786872826516628, "aux_brier/n_active_tok": 254.875, "aux_brier/n_step_records": 63.71875, "aux_brier/std_r": 0.056553874298970186, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4790539649030215, "calib/avg_num_step_conf": 8.0703125, "calib/ece": 0.42293172690763053, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003898931257421828, "calib/mean_conf": 0.002771084337349397, "calib/mu_c": 0.0025471698113207547, "calib/mu_w": 0.0029370629370629375, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.00482128472239644, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 445.25390625, "completions/mean_terminated_length": 447.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.11626666666666667, "grad_norm": 0.11608666181564331, "learning_rate": 2.5277777777777778e-06, "loss": 0.066, "num_tokens": 23633405.0, "reward": 1.034699559211731, "reward_std": 0.24297651648521423, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5528604984283447, "rewards/format_reward_step": 0.96484375, "step": 109 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.833705435942527e-07, "aux_brier/mean_r": 0.9681633003056049, "aux_brier/n_active_tok": 238.5, "aux_brier/n_step_records": 59.625, "aux_brier/std_r": 0.07569363817333397, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4965189449725532, "calib/avg_num_step_conf": 7.453125, "calib/ece": 0.38418326693227095, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010898379970544925, "calib/mean_conf": 0.003864541832669323, "calib/mu_c": 0.0031958762886597943, "calib/mu_w": 0.004285714285714287, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007968127490039841, "calib/std_conf": 0.010049953801069364, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2790.0, "completions/max_terminated_length": 2790.0, "completions/mean_length": 422.7578125, "completions/mean_terminated_length": 422.7578125, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.11733333333333333, "grad_norm": 0.17305228114128113, "learning_rate": 2.5e-06, "loss": 0.0447, "num_tokens": 23846551.0, "reward": 1.014258623123169, "reward_std": 0.2644665241241455, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.5960972309112549, "rewards/format_reward_step": 0.97265625, "step": 110 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.416988005742864e-06, "aux_brier/mean_r": 0.9522415660321712, "aux_brier/n_active_tok": 240.75, "aux_brier/n_step_records": 60.1875, "aux_brier/std_r": 0.09247250770158644, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5390584246218049, "calib/avg_num_step_conf": 7.5390625, "calib/ece": 0.42847999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013015127803860205, "calib/mean_conf": 0.0035200000000000006, "calib/mu_c": 0.00425925925925926, "calib/mu_w": 0.00295774647887324, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006356854568102059, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2069.0, "completions/max_terminated_length": 2069.0, "completions/mean_length": 427.91015625, "completions/mean_terminated_length": 429.5882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.1184, "grad_norm": 0.4730086624622345, "learning_rate": 2.4722222222222226e-06, "loss": 0.0305, "num_tokens": 24063504.0, "reward": 1.0467839241027832, "reward_std": 0.24333319067955017, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5543234348297119, "rewards/format_reward_step": 0.97265625, "step": 111 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 5.184052189965982e-06, "aux_brier/mean_r": 0.9448889140039682, "aux_brier/n_active_tok": 252.625, "aux_brier/n_step_records": 63.15625, "aux_brier/std_r": 0.11967398602337198, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.541993006993007, "calib/avg_num_step_conf": 8.25, "calib/ece": 0.4086831275720164, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005734265734265733, "calib/mean_conf": 0.003662551440329218, "calib/mu_c": 0.004, "calib/mu_w": 0.003426573426573427, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000411522633744856, "calib/std_conf": 0.008663895382683614, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 454.3828125, "completions/mean_terminated_length": 465.28802490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.11946666666666667, "grad_norm": 0.12028834968805313, "learning_rate": 2.4444444444444447e-06, "loss": -0.0121, "num_tokens": 24287746.0, "reward": 1.0056431293487549, "reward_std": 0.26166829466819763, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.5616347789764404, "rewards/format_reward_step": 0.94921875, "step": 112 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.1651130849388291e-05, "aux_brier/mean_r": 0.9504367671906948, "aux_brier/n_active_tok": 233.125, "aux_brier/n_step_records": 58.28125, "aux_brier/std_r": 0.1082596214264413, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5126140973630832, "calib/avg_num_step_conf": 7.43359375, "calib/ece": 0.45607142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005197768762677489, "calib/mean_conf": 0.004246031746031746, "calib/mu_c": 0.003965517241379311, "calib/mu_w": 0.0044852941176470595, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.007755373465300932, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 389.9296875, "completions/mean_terminated_length": 393.0, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.12053333333333334, "grad_norm": 0.5445934534072876, "learning_rate": 2.4166666666666667e-06, "loss": 0.0312, "num_tokens": 24492768.0, "reward": 1.0741214752197266, "reward_std": 0.28144198656082153, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5308605432510376, "rewards/format_reward_step": 0.9765625, "step": 113 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.246828310602082e-06, "aux_brier/mean_r": 0.9656636826694012, "aux_brier/n_active_tok": 256.0, "aux_brier/n_step_records": 64.0, "aux_brier/std_r": 0.07946583925559025, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5231922734782057, "calib/avg_num_step_conf": 8.078125, "calib/ece": 0.5093227091633467, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00025797432964798535, "calib/mean_conf": 0.0046215139442231075, "calib/mu_c": 0.004496124031007753, "calib/mu_w": 0.004754098360655738, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007152032509903317, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 427.8984375, "completions/mean_terminated_length": 429.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1216, "grad_norm": 0.06990226358175278, "learning_rate": 2.388888888888889e-06, "loss": 0.0485, "num_tokens": 24707334.0, "reward": 1.114396333694458, "reward_std": 0.21713295578956604, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.4810226559638977, "rewards/format_reward_step": 0.98046875, "step": 114 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3673805484559765e-06, "aux_brier/mean_r": 0.9552805405110121, "aux_brier/n_active_tok": 219.125, "aux_brier/n_step_records": 54.78125, "aux_brier/std_r": 0.09698930357171776, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.502511284887787, "calib/avg_num_step_conf": 6.84765625, "calib/ece": 0.41728346456692916, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00023459851230211732, "calib/mean_conf": 0.003976377952755907, "calib/mu_c": 0.00411214953271028, "calib/mu_w": 0.003877551020408163, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008713248638791603, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2285.0, "completions/max_terminated_length": 2285.0, "completions/mean_length": 379.03125, "completions/mean_terminated_length": 379.03125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.12266666666666666, "grad_norm": 0.2887187898159027, "learning_rate": 2.361111111111111e-06, "loss": 0.0348, "num_tokens": 24909630.0, "reward": 1.0584537982940674, "reward_std": 0.2040390968322754, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5775652527809143, "rewards/format_reward_step": 0.9921875, "step": 115 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.4063930813490444e-05, "aux_brier/mean_r": 0.975744441151619, "aux_brier/n_active_tok": 258.625, "aux_brier/n_step_records": 64.65625, "aux_brier/std_r": 0.06502547607260567, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4887722646310433, "calib/avg_num_step_conf": 8.0859375, "calib/ece": 0.47318725099601594, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00019083969465648852, "calib/mean_conf": 0.004900398406374502, "calib/mu_c": 0.005, "calib/mu_w": 0.004809160305343512, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.007326345506914801, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 475.84765625, "completions/mean_terminated_length": 475.84765625, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.12373333333333333, "grad_norm": 0.03878147527575493, "learning_rate": 2.3333333333333336e-06, "loss": 0.0977, "num_tokens": 25135967.0, "reward": 1.0860750675201416, "reward_std": 0.2738315761089325, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.516175389289856, "rewards/format_reward_step": 0.9765625, "step": 116 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.8651252199908264e-06, "aux_brier/mean_r": 0.9706146735697985, "aux_brier/n_active_tok": 283.5, "aux_brier/n_step_records": 70.875, "aux_brier/std_r": 0.07314342059862611, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4917989417989418, "calib/avg_num_step_conf": 8.96875, "calib/ece": 0.4309677419354839, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00029100529100529234, "calib/mean_conf": 0.004516129032258065, "calib/mu_c": 0.0043518518518518515, "calib/mu_w": 0.004642857142857144, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006137515351754984, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 463.20703125, "completions/mean_terminated_length": 465.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1248, "grad_norm": 0.19419409334659576, "learning_rate": 2.305555555555556e-06, "loss": 0.0843, "num_tokens": 25361148.0, "reward": 1.040942907333374, "reward_std": 0.2531500458717346, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5465843677520752, "rewards/format_reward_step": 0.96484375, "step": 117 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3811674858588141e-05, "aux_brier/mean_r": 0.9684844072908163, "aux_brier/n_active_tok": 247.625, "aux_brier/n_step_records": 61.90625, "aux_brier/std_r": 0.07474729399609714, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5071497584541063, "calib/avg_num_step_conf": 7.77734375, "calib/ece": 0.4544, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007085346215781009, "calib/mean_conf": 0.0056, "calib/mu_c": 0.0052173913043478265, "calib/mu_w": 0.005925925925925927, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.007989993742175272, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2753.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 426.01171875, "completions/mean_terminated_length": 426.01171875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.12586666666666665, "grad_norm": 0.31143271923065186, "learning_rate": 2.277777777777778e-06, "loss": 0.0613, "num_tokens": 25574215.0, "reward": 1.0714612007141113, "reward_std": 0.21858364343643188, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5280320048332214, "rewards/format_reward_step": 0.97265625, "step": 118 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.580893600917511e-06, "aux_brier/mean_r": 0.9689757581800222, "aux_brier/n_active_tok": 259.0, "aux_brier/n_step_records": 64.75, "aux_brier/std_r": 0.07773740086304315, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5326171875000001, "calib/avg_num_step_conf": 8.34765625, "calib/ece": 0.477983870967742, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008645833333333335, "calib/mean_conf": 0.005887096774193548, "calib/mu_c": 0.006333333333333333, "calib/mu_w": 0.00546875, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007881388527389512, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2924.0, "completions/max_terminated_length": 2924.0, "completions/mean_length": 471.68359375, "completions/mean_terminated_length": 475.39764404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12693333333333334, "grad_norm": 0.2812502682209015, "learning_rate": 2.25e-06, "loss": 0.0584, "num_tokens": 25800030.0, "reward": 1.0834921598434448, "reward_std": 0.2419714480638504, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5058437585830688, "rewards/format_reward_step": 0.96875, "step": 119 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.848374763748552e-06, "aux_brier/mean_r": 0.9654323738068342, "aux_brier/n_active_tok": 218.875, "aux_brier/n_step_records": 54.71875, "aux_brier/std_r": 0.0905265407668594, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5359330083739533, "calib/avg_num_step_conf": 6.83984375, "calib/ece": 0.49541501976284585, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000574303212098488, "calib/mean_conf": 0.007351778656126482, "calib/mu_c": 0.0076377952755905506, "calib/mu_w": 0.0070634920634920625, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.009724605461500068, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2692.0, "completions/max_terminated_length": 2692.0, "completions/mean_length": 383.2265625, "completions/mean_terminated_length": 383.2265625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.128, "grad_norm": 0.2764308452606201, "learning_rate": 2.222222222222222e-06, "loss": 0.0207, "num_tokens": 26004824.0, "reward": 1.113147258758545, "reward_std": 0.2522197961807251, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.4994640648365021, "rewards/format_reward_step": 0.984375, "step": 120 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.378986479176451e-06, "aux_brier/mean_r": 0.9701242838054895, "aux_brier/n_active_tok": 246.375, "aux_brier/n_step_records": 61.59375, "aux_brier/std_r": 0.0730468185799964, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49100323624595466, "calib/avg_num_step_conf": 7.8671875, "calib/ece": 0.40114624505928853, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00042847896440129443, "calib/mean_conf": 0.006758893280632411, "calib/mu_c": 0.0065048543689320395, "calib/mu_w": 0.006933333333333334, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.009687262426538614, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1842.0, "completions/max_terminated_length": 1842.0, "completions/mean_length": 450.59375, "completions/mean_terminated_length": 452.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.12906666666666666, "grad_norm": 0.32100552320480347, "learning_rate": 2.1944444444444445e-06, "loss": 0.0181, "num_tokens": 26225232.0, "reward": 1.041322946548462, "reward_std": 0.28508004546165466, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.5871667861938477, "rewards/format_reward_step": 0.984375, "step": 121 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.9163343682192533e-06, "aux_brier/mean_r": 0.9640793260186911, "aux_brier/n_active_tok": 213.125, "aux_brier/n_step_records": 53.28125, "aux_brier/std_r": 0.09431882056742324, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5580948722873056, "calib/avg_num_step_conf": 6.66015625, "calib/ece": 0.50196, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006612892900582541, "calib/mean_conf": 0.00684, "calib/mu_c": 0.007165354330708661, "calib/mu_w": 0.0065040650406504065, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004, "calib/std_conf": 0.008989682975500304, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 404.56640625, "completions/mean_terminated_length": 406.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.13013333333333332, "grad_norm": 0.12830238044261932, "learning_rate": 2.166666666666667e-06, "loss": 0.089, "num_tokens": 26436145.0, "reward": 1.1033095121383667, "reward_std": 0.27963879704475403, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.4835507869720459, "rewards/format_reward_step": 0.97265625, "step": 122 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.324065981309075e-05, "aux_brier/mean_r": 0.9489955492317677, "aux_brier/n_active_tok": 243.875, "aux_brier/n_step_records": 60.96875, "aux_brier/std_r": 0.09930840979814093, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.505783805345034, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.43582995951417003, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00039422949075920786, "calib/mean_conf": 0.006275303643724695, "calib/mu_c": 0.006055045871559634, "calib/mu_w": 0.006449275362318842, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.00040485829959514174, "calib/std_conf": 0.009170733098044765, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2570.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 477.171875, "completions/mean_terminated_length": 484.7460632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.1312, "grad_norm": 0.3006885349750519, "learning_rate": 2.138888888888889e-06, "loss": 0.0007, "num_tokens": 26663589.0, "reward": 1.0354487895965576, "reward_std": 0.2609788775444031, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5324199199676514, "rewards/format_reward_step": 0.953125, "step": 123 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.929609923902788e-06, "aux_brier/mean_r": 0.9632034786045551, "aux_brier/n_active_tok": 216.625, "aux_brier/n_step_records": 54.15625, "aux_brier/std_r": 0.08402901581484912, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5289558531746031, "calib/avg_num_step_conf": 6.76953125, "calib/ece": 0.4893700787401575, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -3.968253968254093e-05, "calib/mean_conf": 0.007480314960629922, "calib/mu_c": 0.00746031746031746, "calib/mu_w": 0.007500000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.009554526810782686, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2566.0, "completions/max_terminated_length": 2566.0, "completions/mean_length": 391.19921875, "completions/mean_terminated_length": 391.19921875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.13226666666666667, "grad_norm": 0.05961330235004425, "learning_rate": 2.1111111111111114e-06, "loss": 0.0141, "num_tokens": 26870552.0, "reward": 1.115080714225769, "reward_std": 0.23061586916446686, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5071976184844971, "rewards/format_reward_step": 0.9921875, "step": 124 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.746835116005045e-06, "aux_brier/mean_r": 0.9573018476366997, "aux_brier/n_active_tok": 216.0, "aux_brier/n_step_records": 54.0, "aux_brier/std_r": 0.08736527298242436, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5488609112709832, "calib/avg_num_step_conf": 6.84765625, "calib/ece": 0.4305991902834008, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011853184119371167, "calib/mean_conf": 0.006647773279352226, "calib/mu_c": 0.007314814814814815, "calib/mu_w": 0.006129496402877698, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006995763980095164, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2695.0, "completions/max_terminated_length": 2695.0, "completions/mean_length": 442.65625, "completions/mean_terminated_length": 444.3921813964844, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.13333333333333333, "grad_norm": 0.09035126864910126, "learning_rate": 2.0833333333333334e-06, "loss": 0.0777, "num_tokens": 27088680.0, "reward": 1.0454658269882202, "reward_std": 0.268334299325943, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5490508079528809, "rewards/format_reward_step": 0.96484375, "step": 125 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.3727328123991356e-05, "aux_brier/mean_r": 0.9479384887963533, "aux_brier/n_active_tok": 208.5, "aux_brier/n_step_records": 52.125, "aux_brier/std_r": 0.10931747580752926, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5127183967112025, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.44258964143426294, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005481757451181912, "calib/mean_conf": 0.009999999999999998, "calib/mu_c": 0.0069642857142857145, "calib/mu_w": 0.012446043165467626, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0031872509960159364, "calib/std_conf": 0.050566511759797955, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2193.0, "completions/max_terminated_length": 2193.0, "completions/mean_length": 396.109375, "completions/mean_terminated_length": 397.66278076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.1344, "grad_norm": 0.2300887256860733, "learning_rate": 2.0555555555555555e-06, "loss": -0.0012, "num_tokens": 27295548.0, "reward": 1.0571612119674683, "reward_std": 0.22459924221038818, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5333324670791626, "rewards/format_reward_step": 0.96484375, "step": 126 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.327482300726549e-06, "aux_brier/mean_r": 0.9759995881468058, "aux_brier/n_active_tok": 216.75, "aux_brier/n_step_records": 54.1875, "aux_brier/std_r": 0.0617162466321588, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5138588684043229, "calib/avg_num_step_conf": 7.0078125, "calib/ece": 0.4744223107569721, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00019135410044501117, "calib/mean_conf": 0.008446215139442232, "calib/mu_c": 0.008347107438016529, "calib/mu_w": 0.00853846153846154, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00039840637450199205, "calib/std_conf": 0.008991812102019138, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2574.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 399.9765625, "completions/mean_terminated_length": 403.1259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.13546666666666668, "grad_norm": 0.0733683854341507, "learning_rate": 2.027777777777778e-06, "loss": 0.0006, "num_tokens": 27501614.0, "reward": 1.089825987815857, "reward_std": 0.21522581577301025, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5155539512634277, "rewards/format_reward_step": 0.9765625, "step": 127 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.7014811636129057e-05, "aux_brier/mean_r": 0.9677179660648108, "aux_brier/n_active_tok": 183.25, "aux_brier/n_step_records": 45.8125, "aux_brier/std_r": 0.07544746055828, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.46982470226147466, "calib/avg_num_step_conf": 5.76953125, "calib/ece": 0.42113360323886634, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009179713635755395, "calib/mean_conf": 0.00882591093117409, "calib/mu_c": 0.00830188679245283, "calib/mu_w": 0.00921985815602837, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.00040485829959514174, "calib/std_conf": 0.008761230888426914, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2295.0, "completions/max_terminated_length": 2295.0, "completions/mean_length": 403.94921875, "completions/mean_terminated_length": 407.1299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.13653333333333334, "grad_norm": 0.11573495715856552, "learning_rate": 2.0000000000000003e-06, "loss": -0.0051, "num_tokens": 27711689.0, "reward": 1.0300018787384033, "reward_std": 0.2851436734199524, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5496948957443237, "rewards/format_reward_step": 0.95703125, "step": 128 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.0183846687800147e-05, "aux_brier/mean_r": 0.9540750030428171, "aux_brier/n_active_tok": 205.75, "aux_brier/n_step_records": 51.4375, "aux_brier/std_r": 0.10148344254503172, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4885369815092454, "calib/avg_num_step_conf": 6.44140625, "calib/ece": 0.5335433070866142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005934532733633188, "calib/mean_conf": 0.009763779527559057, "calib/mu_c": 0.009492753623188407, "calib/mu_w": 0.010086206896551726, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008414888093176476, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1727.0, "completions/max_terminated_length": 1727.0, "completions/mean_length": 373.62890625, "completions/mean_terminated_length": 375.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.1376, "grad_norm": 0.045525770634412766, "learning_rate": 1.9722222222222224e-06, "loss": 0.0085, "num_tokens": 27909722.0, "reward": 1.1480300426483154, "reward_std": 0.19109825789928436, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4593074321746826, "rewards/format_reward_step": 0.98828125, "step": 129 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.0706090846079874e-06, "aux_brier/mean_r": 0.9589158948510885, "aux_brier/n_active_tok": 207.375, "aux_brier/n_step_records": 51.84375, "aux_brier/std_r": 0.09351115918161668, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48998103666245263, "calib/avg_num_step_conf": 6.55078125, "calib/ece": 0.544901185770751, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.292035398229988e-05, "calib/mean_conf": 0.008458498023715415, "calib/mu_c": 0.008499999999999999, "calib/mu_w": 0.008407079646017699, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007250136765576311, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1725.0, "completions/max_terminated_length": 1725.0, "completions/mean_length": 372.828125, "completions/mean_terminated_length": 375.7637634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.13866666666666666, "grad_norm": 0.2924541234970093, "learning_rate": 1.944444444444445e-06, "loss": -0.0377, "num_tokens": 28110454.0, "reward": 1.157567024230957, "reward_std": 0.1929994523525238, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4505804777145386, "rewards/format_reward_step": 0.98828125, "step": 130 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.584738323442969e-05, "aux_brier/mean_r": 0.968502027913928, "aux_brier/n_active_tok": 193.125, "aux_brier/n_step_records": 48.28125, "aux_brier/std_r": 0.07519459139598439, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.533197139938713, "calib/avg_num_step_conf": 6.0625, "calib/ece": 0.34090551181102363, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00147974123255022, "calib/mean_conf": 0.009488188976377954, "calib/mu_c": 0.01044943820224719, "calib/mu_w": 0.00896969696969697, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0072180505817366234, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2519.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 367.22265625, "completions/mean_terminated_length": 368.66278076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.13973333333333332, "grad_norm": 0.36632171273231506, "learning_rate": 1.916666666666667e-06, "loss": 0.0244, "num_tokens": 28310671.0, "reward": 1.0045971870422363, "reward_std": 0.18777036666870117, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6512011289596558, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.2457084497236037e-05, "aux_brier/mean_r": 0.9254661910235882, "aux_brier/n_active_tok": 208.125, "aux_brier/n_step_records": 52.03125, "aux_brier/std_r": 0.1275807368617734, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6009348807221147, "calib/avg_num_step_conf": 6.50390625, "calib/ece": 0.5520717131474103, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002669245647969054, "calib/mean_conf": 0.009681274900398405, "calib/mu_c": 0.010851063829787235, "calib/mu_w": 0.00818181818181818, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006490723380956479, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 421.73828125, "completions/mean_terminated_length": 425.0590515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1408, "grad_norm": 0.2212802916765213, "learning_rate": 1.888888888888889e-06, "loss": 0.0312, "num_tokens": 28524228.0, "reward": 1.149419903755188, "reward_std": 0.2338859736919403, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.44142967462539673, "rewards/format_reward_step": 0.9765625, "step": 132 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.1967071229523896e-06, "aux_brier/mean_r": 0.965488750487566, "aux_brier/n_active_tok": 236.625, "aux_brier/n_step_records": 59.15625, "aux_brier/std_r": 0.0806026617342468, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5054117302910632, "calib/avg_num_step_conf": 7.6015625, "calib/ece": 0.3409387755102041, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009492467456486773, "calib/mean_conf": 0.010081632653061223, "calib/mu_c": 0.010697674418604652, "calib/mu_w": 0.009748427672955974, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008181204778035947, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2539.0, "completions/max_terminated_length": 2539.0, "completions/mean_length": 482.453125, "completions/mean_terminated_length": 488.1739196777344, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.14186666666666667, "grad_norm": 0.11848221719264984, "learning_rate": 1.8611111111111113e-06, "loss": 0.0609, "num_tokens": 28754080.0, "reward": 0.9685534238815308, "reward_std": 0.28001537919044495, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.624213695526123, "rewards/format_reward_step": 0.953125, "step": 133 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.0240007061794785e-06, "aux_brier/mean_r": 0.9572178870439529, "aux_brier/n_active_tok": 214.0, "aux_brier/n_step_records": 53.5, "aux_brier/std_r": 0.09566936575311047, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5525096525096524, "calib/avg_num_step_conf": 6.69921875, "calib/ece": 0.4327091633466135, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011801801801801827, "calib/mean_conf": 0.00952191235059761, "calib/mu_c": 0.010180180180180182, "calib/mu_w": 0.009, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007012404911136828, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1823.0, "completions/max_terminated_length": 1823.0, "completions/mean_length": 452.70703125, "completions/mean_terminated_length": 454.4823913574219, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.14293333333333333, "grad_norm": 0.36530932784080505, "learning_rate": 1.8333333333333333e-06, "loss": 0.0365, "num_tokens": 28978925.0, "reward": 1.0666258335113525, "reward_std": 0.2299438863992691, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5555660128593445, "rewards/format_reward_step": 0.98046875, "step": 134 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.6307225074729814e-05, "aux_brier/mean_r": 0.978693287819624, "aux_brier/n_active_tok": 210.5, "aux_brier/n_step_records": 52.625, "aux_brier/std_r": 0.06343126370120444, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.562784090909091, "calib/avg_num_step_conf": 6.609375, "calib/ece": 0.4590873015873015, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.015234848484848486, "calib/mean_conf": 0.017103174603174606, "calib/mu_c": 0.025083333333333336, "calib/mu_w": 0.00984848484848485, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.06801567622353039, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1950.0, "completions/max_terminated_length": 1950.0, "completions/mean_length": 429.2890625, "completions/mean_terminated_length": 434.37945556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.144, "grad_norm": 0.10206235945224762, "learning_rate": 1.8055555555555557e-06, "loss": 0.0305, "num_tokens": 29194703.0, "reward": 1.089521884918213, "reward_std": 0.3020398020744324, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5299628973007202, "rewards/format_reward_step": 0.9765625, "step": 135 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.833252504330641e-06, "aux_brier/mean_r": 0.9679927006363869, "aux_brier/n_active_tok": 218.375, "aux_brier/n_step_records": 54.59375, "aux_brier/std_r": 0.08068615557567682, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5633767058076802, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.4468650793650793, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006086956521739132, "calib/mean_conf": 0.01027777777777778, "calib/mu_c": 0.010608695652173913, "calib/mu_w": 0.01, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.008701522297840972, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2819.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 417.5234375, "completions/mean_terminated_length": 419.1607971191406, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.14506666666666668, "grad_norm": 0.09003238379955292, "learning_rate": 1.777777777777778e-06, "loss": 0.0138, "num_tokens": 29410077.0, "reward": 1.077533483505249, "reward_std": 0.24539029598236084, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5445089936256409, "rewards/format_reward_step": 0.984375, "step": 136 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -6.853463755396305e-06, "aux_brier/mean_r": 0.9560976848006248, "aux_brier/n_active_tok": 217.125, "aux_brier/n_step_records": 54.28125, "aux_brier/std_r": 0.09084317275892317, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5137765502729006, "calib/avg_num_step_conf": 7.25, "calib/ece": 0.4375403225806452, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008726244492667859, "calib/mean_conf": 0.011653225806451614, "calib/mu_c": 0.011171171171171172, "calib/mu_w": 0.012043795620437957, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0008064516129032258, "calib/std_conf": 0.009841931651176554, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2397.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 427.7578125, "completions/mean_terminated_length": 432.8300476074219, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.14613333333333334, "grad_norm": 0.7881132364273071, "learning_rate": 1.75e-06, "loss": 0.0012, "num_tokens": 29626567.0, "reward": 1.052150845527649, "reward_std": 0.2891596257686615, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5445406436920166, "rewards/format_reward_step": 0.96484375, "step": 137 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.754256842365526e-06, "aux_brier/mean_r": 0.972322128713131, "aux_brier/n_active_tok": 212.75, "aux_brier/n_step_records": 53.1875, "aux_brier/std_r": 0.06768090707100782, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5817049808429118, "calib/avg_num_step_conf": 6.96484375, "calib/ece": 0.526613545816733, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027771392081736897, "calib/mean_conf": 0.011235059760956175, "calib/mu_c": 0.012518518518518519, "calib/mu_w": 0.00974137931034483, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00830617575504633, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 420.69140625, "completions/mean_terminated_length": 425.67987060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.1472, "grad_norm": 0.1462380439043045, "learning_rate": 1.7222222222222224e-06, "loss": 0.0457, "num_tokens": 29838600.0, "reward": 1.1341123580932617, "reward_std": 0.30004674196243286, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4661366939544678, "rewards/format_reward_step": 0.98046875, "step": 138 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.6156946503587122e-05, "aux_brier/mean_r": 0.9706994574517012, "aux_brier/n_active_tok": 209.75, "aux_brier/n_step_records": 52.4375, "aux_brier/std_r": 0.07387235932833391, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5952834986015765, "calib/avg_num_step_conf": 6.5546875, "calib/ece": 0.5367063492063492, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001994660564454614, "calib/mean_conf": 0.011706349206349206, "calib/mu_c": 0.012608695652173913, "calib/mu_w": 0.0106140350877193, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.009164776827111141, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2540.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 393.4921875, "completions/mean_terminated_length": 393.4921875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.14826666666666666, "grad_norm": 0.1997373253107071, "learning_rate": 1.6944444444444446e-06, "loss": 0.0415, "num_tokens": 30042430.0, "reward": 1.1459221839904785, "reward_std": 0.24825318157672882, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4586886763572693, "rewards/format_reward_step": 0.984375, "step": 139 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3413391696803778e-05, "aux_brier/mean_r": 0.9663700181990862, "aux_brier/n_active_tok": 197.5, "aux_brier/n_step_records": 49.375, "aux_brier/std_r": 0.07360042099026032, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5398064516129032, "calib/avg_num_step_conf": 6.171875, "calib/ece": 0.5959607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012870967741935493, "calib/mean_conf": 0.011882352941176472, "calib/mu_c": 0.012387096774193548, "calib/mu_w": 0.011099999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0067726296304114145, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1919.0, "completions/max_terminated_length": 1919.0, "completions/mean_length": 420.73046875, "completions/mean_terminated_length": 420.73046875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.14933333333333335, "grad_norm": 0.1204826757311821, "learning_rate": 1.6666666666666667e-06, "loss": 0.0134, "num_tokens": 30255153.0, "reward": 1.2048753499984741, "reward_std": 0.22785888612270355, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.4054386615753174, "rewards/format_reward_step": 0.99609375, "step": 140 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.2077697552403954e-05, "aux_brier/mean_r": 0.955143878236413, "aux_brier/n_active_tok": 224.375, "aux_brier/n_step_records": 56.09375, "aux_brier/std_r": 0.08634071638903151, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5357724636736821, "calib/avg_num_step_conf": 7.01171875, "calib/ece": 0.5798412698412699, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006385612823353089, "calib/mean_conf": 0.012222222222222223, "calib/mu_c": 0.012483221476510067, "calib/mu_w": 0.011844660194174758, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.009248671194893471, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 454.91796875, "completions/mean_terminated_length": 456.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.1504, "grad_norm": 0.08375510573387146, "learning_rate": 1.638888888888889e-06, "loss": 0.0645, "num_tokens": 30478708.0, "reward": 1.175459861755371, "reward_std": 0.2590000629425049, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.41277655959129333, "rewards/format_reward_step": 0.98046875, "step": 141 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.421746324675182e-06, "aux_brier/mean_r": 0.9625475332140923, "aux_brier/n_active_tok": 228.375, "aux_brier/n_step_records": 57.09375, "aux_brier/std_r": 0.08791654341007415, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5930387794024158, "calib/avg_num_step_conf": 7.13671875, "calib/ece": 0.46896414342629483, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034144945963127758, "calib/mean_conf": 0.013107569721115538, "calib/mu_c": 0.014876033057851238, "calib/mu_w": 0.011461538461538462, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00812651349496107, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2188.0, "completions/max_terminated_length": 2188.0, "completions/mean_length": 437.47265625, "completions/mean_terminated_length": 437.47265625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.15146666666666667, "grad_norm": 0.04434853419661522, "learning_rate": 1.6111111111111113e-06, "loss": 0.0435, "num_tokens": 30695861.0, "reward": 1.1011135578155518, "reward_std": 0.21961593627929688, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5216417908668518, "rewards/format_reward_step": 0.98046875, "step": 142 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.92703469698019e-06, "aux_brier/mean_r": 0.9690980389714241, "aux_brier/n_active_tok": 217.875, "aux_brier/n_step_records": 54.46875, "aux_brier/std_r": 0.08144837511781589, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49753457991803274, "calib/avg_num_step_conf": 6.84765625, "calib/ece": 0.49828, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00041880122950819533, "calib/mean_conf": 0.013720000000000001, "calib/mu_c": 0.013515625000000003, "calib/mu_w": 0.013934426229508199, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.007332230220062651, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2348.0, "completions/max_terminated_length": 2348.0, "completions/mean_length": 438.4921875, "completions/mean_terminated_length": 440.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.15253333333333333, "grad_norm": 0.30172309279441833, "learning_rate": 1.5833333333333333e-06, "loss": 0.0451, "num_tokens": 30915451.0, "reward": 1.1107416152954102, "reward_std": 0.23415014147758484, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.4898417890071869, "rewards/format_reward_step": 0.9765625, "step": 143 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.6517664375576002e-06, "aux_brier/mean_r": 0.9828932750970125, "aux_brier/n_active_tok": 210.75, "aux_brier/n_step_records": 52.6875, "aux_brier/std_r": 0.047716316444507356, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48301767676767676, "calib/avg_num_step_conf": 6.76171875, "calib/ece": 0.5111507936507937, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001212121212121213, "calib/mean_conf": 0.013531746031746032, "calib/mu_c": 0.012954545454545455, "calib/mu_w": 0.014166666666666668, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004365079365079365, "calib/std_conf": 0.009461019868458851, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 432.796875, "completions/mean_terminated_length": 434.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.1536, "grad_norm": 0.361885666847229, "learning_rate": 1.5555555555555558e-06, "loss": 0.0282, "num_tokens": 31130375.0, "reward": 1.1282727718353271, "reward_std": 0.24435147643089294, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.48184099793434143, "rewards/format_reward_step": 0.984375, "step": 144 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 8.398806450399254e-06, "aux_brier/mean_r": 0.9492561742663383, "aux_brier/n_active_tok": 235.5, "aux_brier/n_step_records": 58.875, "aux_brier/std_r": 0.08559227903128885, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.560848553317015, "calib/avg_num_step_conf": 7.36328125, "calib/ece": 0.5400393700787401, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0029316512897759415, "calib/mean_conf": 0.015078740157480315, "calib/mu_c": 0.01638297872340426, "calib/mu_w": 0.013451327433628318, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009830938067889889, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 420.3203125, "completions/mean_terminated_length": 421.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.15466666666666667, "grad_norm": 0.20340090990066528, "learning_rate": 1.527777777777778e-06, "loss": 0.0693, "num_tokens": 31340681.0, "reward": 1.1616579294204712, "reward_std": 0.27464747428894043, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4591316282749176, "rewards/format_reward_step": 0.9921875, "step": 145 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -7.21773448231211e-06, "aux_brier/mean_r": 0.9842414446175098, "aux_brier/n_active_tok": 211.75, "aux_brier/n_step_records": 52.9375, "aux_brier/std_r": 0.04281771529304024, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4759467416031045, "calib/avg_num_step_conf": 6.7109375, "calib/ece": 0.3571541501976285, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00045229492840894066, "calib/mean_conf": 0.01517786561264822, "calib/mu_c": 0.014893617021276596, "calib/mu_w": 0.015345911949685537, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.00968210036000193, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2030.0, "completions/max_terminated_length": 2030.0, "completions/mean_length": 431.30859375, "completions/mean_terminated_length": 433.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.15573333333333333, "grad_norm": 0.09728241711854935, "learning_rate": 1.5e-06, "loss": -0.0143, "num_tokens": 31558312.0, "reward": 1.0134062767028809, "reward_std": 0.2111670821905136, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6239379048347473, "rewards/format_reward_step": 0.98046875, "step": 146 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.727447618175674e-06, "aux_brier/mean_r": 0.9704741071909666, "aux_brier/n_active_tok": 212.5, "aux_brier/n_step_records": 53.125, "aux_brier/std_r": 0.06024122057306158, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5446589446589447, "calib/avg_num_step_conf": 6.7421875, "calib/ece": 0.429203187250996, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011936936936936954, "calib/mean_conf": 0.01302788844621514, "calib/mu_c": 0.013693693693693696, "calib/mu_w": 0.0125, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006768454124728317, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2138.0, "completions/max_terminated_length": 2138.0, "completions/mean_length": 428.51171875, "completions/mean_terminated_length": 430.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.1568, "grad_norm": 0.11274062097072601, "learning_rate": 1.4722222222222225e-06, "loss": 0.0227, "num_tokens": 31771691.0, "reward": 1.063462734222412, "reward_std": 0.21448275446891785, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5585386753082275, "rewards/format_reward_step": 0.98046875, "step": 147 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.8187937408108432e-06, "aux_brier/mean_r": 0.9696711394935846, "aux_brier/n_active_tok": 238.625, "aux_brier/n_step_records": 59.65625, "aux_brier/std_r": 0.0703499539031327, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5179078253818084, "calib/avg_num_step_conf": 7.45703125, "calib/ece": 0.5902409638554217, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019536423841059573, "calib/mean_conf": 0.016184738955823293, "calib/mu_c": 0.016953642384105957, "calib/mu_w": 0.015, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009458321596447002, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 445.63671875, "completions/mean_terminated_length": 447.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.15786666666666666, "grad_norm": 0.16805052757263184, "learning_rate": 1.4444444444444445e-06, "loss": 0.1193, "num_tokens": 31990886.0, "reward": 1.1748169660568237, "reward_std": 0.23278938233852386, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.4023929834365845, "rewards/format_reward_step": 0.96875, "step": 148 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.8804519937318815e-06, "aux_brier/mean_r": 0.9761108998209238, "aux_brier/n_active_tok": 235.75, "aux_brier/n_step_records": 58.9375, "aux_brier/std_r": 0.07256381576689819, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4547360949799974, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.47815261044176705, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010646535036778898, "calib/mean_conf": 0.015823293172690763, "calib/mu_c": 0.015284552845528456, "calib/mu_w": 0.016349206349206346, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0072964272765788036, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2142.0, "completions/max_terminated_length": 2142.0, "completions/mean_length": 473.359375, "completions/mean_terminated_length": 478.97235107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.15893333333333334, "grad_norm": 0.4073260724544525, "learning_rate": 1.4166666666666667e-06, "loss": 0.0273, "num_tokens": 32216522.0, "reward": 1.0934418439865112, "reward_std": 0.26737549901008606, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5065796971321106, "rewards/format_reward_step": 0.97265625, "step": 149 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.205518379483351e-06, "aux_brier/mean_r": 0.9565852768719196, "aux_brier/n_active_tok": 203.25, "aux_brier/n_step_records": 50.8125, "aux_brier/std_r": 0.10582352201072354, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5540957781978575, "calib/avg_num_step_conf": 6.36328125, "calib/ece": 0.5292885375494072, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007971014492753614, "calib/mean_conf": 0.016956521739130436, "calib/mu_c": 0.017318840579710146, "calib/mu_w": 0.016521739130434785, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.01054943599247888, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 384.4921875, "completions/mean_terminated_length": 386.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.16, "grad_norm": 0.5043721199035645, "learning_rate": 1.3888888888888892e-06, "loss": 0.0408, "num_tokens": 32419912.0, "reward": 1.1471573114395142, "reward_std": 0.24053438007831573, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4636293053627014, "rewards/format_reward_step": 0.984375, "step": 150 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.471196241750963e-06, "aux_brier/mean_r": 0.9629783872514963, "aux_brier/n_active_tok": 240.125, "aux_brier/n_step_records": 60.03125, "aux_brier/std_r": 0.09282081335732073, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5635138670852956, "calib/avg_num_step_conf": 7.50390625, "calib/ece": 0.36917322834645666, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002291993720565152, "calib/mean_conf": 0.016653543307086616, "calib/mu_c": 0.01806122448979592, "calib/mu_w": 0.015769230769230768, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.007900840135488738, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 480.09765625, "completions/mean_terminated_length": 480.09765625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.16106666666666666, "grad_norm": 0.14850957691669464, "learning_rate": 1.3611111111111112e-06, "loss": 0.0615, "num_tokens": 32649841.0, "reward": 1.0258342027664185, "reward_std": 0.25838619470596313, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6111491918563843, "rewards/format_reward_step": 0.98046875, "step": 151 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.551428808756807e-06, "aux_brier/mean_r": 0.9819429218769073, "aux_brier/n_active_tok": 229.375, "aux_brier/n_step_records": 57.34375, "aux_brier/std_r": 0.04913567129915464, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47147235263432447, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.41348, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017892540427751694, "calib/mean_conf": 0.019719999999999998, "calib/mu_c": 0.018703703703703705, "calib/mu_w": 0.020492957746478874, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006, "calib/std_conf": 0.012211535529981476, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2567.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 439.15234375, "completions/mean_terminated_length": 442.6102294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.16213333333333332, "grad_norm": 0.17242205142974854, "learning_rate": 1.3333333333333334e-06, "loss": 0.0098, "num_tokens": 32867656.0, "reward": 1.0497124195098877, "reward_std": 0.28479188680648804, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5660374760627747, "rewards/format_reward_step": 0.97265625, "step": 152 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.1157876845590007e-05, "aux_brier/mean_r": 0.9536847770214081, "aux_brier/n_active_tok": 236.375, "aux_brier/n_step_records": 59.09375, "aux_brier/std_r": 0.08569288278499698, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5372507211839961, "calib/avg_num_step_conf": 7.390625, "calib/ece": 0.5109881422924901, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001270538065972663, "calib/mean_conf": 0.01865612648221344, "calib/mu_c": 0.019253731343283582, "calib/mu_w": 0.01798319327731092, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.009397477376577758, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 463.66015625, "completions/mean_terminated_length": 463.66015625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.1632, "grad_norm": 0.12410463392734528, "learning_rate": 1.3055555555555556e-06, "loss": 0.0518, "num_tokens": 33093673.0, "reward": 1.1337988376617432, "reward_std": 0.2022978961467743, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.4805077910423279, "rewards/format_reward_step": 0.98046875, "step": 153 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -9.295652751761807e-06, "aux_brier/mean_r": 0.9636547118425369, "aux_brier/n_active_tok": 204.625, "aux_brier/n_step_records": 51.15625, "aux_brier/std_r": 0.08433311288376899, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4845013477088948, "calib/avg_num_step_conf": 6.6328125, "calib/ece": 0.3998814229249012, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001257219869079708, "calib/mean_conf": 0.019881422924901186, "calib/mu_c": 0.019150943396226413, "calib/mu_w": 0.02040816326530612, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.010272270261008794, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2431.0, "completions/max_terminated_length": 2431.0, "completions/mean_length": 393.23046875, "completions/mean_terminated_length": 396.3267822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.16426666666666667, "grad_norm": 0.5878264307975769, "learning_rate": 1.2777777777777779e-06, "loss": -0.0056, "num_tokens": 33298780.0, "reward": 1.053626298904419, "reward_std": 0.19714659452438354, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5895054340362549, "rewards/format_reward_step": 0.984375, "step": 154 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -7.787100995915175e-06, "aux_brier/mean_r": 0.9587889201939106, "aux_brier/n_active_tok": 200.375, "aux_brier/n_step_records": 50.09375, "aux_brier/std_r": 0.0828558601288023, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5689196208284638, "calib/avg_num_step_conf": 6.40625, "calib/ece": 0.38379446640316206, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001600441501103743, "calib/mean_conf": 0.020711462450592886, "calib/mu_c": 0.021666666666666657, "calib/mu_w": 0.020066225165562914, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006719367588932806, "calib/std_conf": 0.013926960341131575, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2593.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 388.67578125, "completions/mean_terminated_length": 390.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.16533333333333333, "grad_norm": 0.29757195711135864, "learning_rate": 1.25e-06, "loss": 0.0586, "num_tokens": 33505497.0, "reward": 1.0442014932632446, "reward_std": 0.21927215158939362, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6064937710762024, "rewards/format_reward_step": 0.98828125, "step": 155 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.2814041810971233e-05, "aux_brier/mean_r": 0.9737746305763721, "aux_brier/n_active_tok": 226.125, "aux_brier/n_step_records": 56.53125, "aux_brier/std_r": 0.06996969851707036, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.532210401891253, "calib/avg_num_step_conf": 7.0703125, "calib/ece": 0.41176706827309234, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014302600472813273, "calib/mean_conf": 0.02196787148594377, "calib/mu_c": 0.02277777777777778, "calib/mu_w": 0.02134751773049645, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.012113211442609415, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 428.66015625, "completions/mean_terminated_length": 430.3412170410156, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.1664, "grad_norm": 0.1781948208808899, "learning_rate": 1.2222222222222223e-06, "loss": 0.0255, "num_tokens": 33719994.0, "reward": 1.049492359161377, "reward_std": 0.2631025016307831, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5651570558547974, "rewards/format_reward_step": 0.96484375, "step": 156 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.0934366624935095e-06, "aux_brier/mean_r": 0.957715917378664, "aux_brier/n_active_tok": 234.75, "aux_brier/n_step_records": 58.6875, "aux_brier/std_r": 0.09730827118710295, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5010714060628978, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.4913095238095238, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006258271885044402, "calib/mean_conf": 0.02146825396825397, "calib/mu_c": 0.02116279069767442, "calib/mu_w": 0.02178861788617886, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004365079365079365, "calib/std_conf": 0.011710249552013005, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1903.0, "completions/max_terminated_length": 1903.0, "completions/mean_length": 415.35546875, "completions/mean_terminated_length": 416.9843444824219, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.16746666666666668, "grad_norm": 0.22403131425380707, "learning_rate": 1.1944444444444446e-06, "loss": 0.0208, "num_tokens": 33930053.0, "reward": 1.1213958263397217, "reward_std": 0.21231956779956818, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5012081861495972, "rewards/format_reward_step": 0.984375, "step": 157 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.9515287186751273e-05, "aux_brier/mean_r": 0.960906183347106, "aux_brier/n_active_tok": 217.625, "aux_brier/n_step_records": 54.40625, "aux_brier/std_r": 0.07994146180408279, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5614431023521932, "calib/avg_num_step_conf": 6.84765625, "calib/ece": 0.5438339920948616, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017552447552447586, "calib/mean_conf": 0.022173913043478263, "calib/mu_c": 0.02293706293706294, "calib/mu_w": 0.02118181818181818, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0003952569169960474, "calib/std_conf": 0.011644325124840508, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1657.0, "completions/max_terminated_length": 1657.0, "completions/mean_length": 396.75390625, "completions/mean_terminated_length": 401.4585266113281, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.16853333333333334, "grad_norm": 0.06009389087557793, "learning_rate": 1.1666666666666668e-06, "loss": -0.0154, "num_tokens": 34136862.0, "reward": 1.164415717124939, "reward_std": 0.23946413397789001, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.45453786849975586, "rewards/format_reward_step": 0.984375, "step": 158 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.9505824506715896e-06, "aux_brier/mean_r": 0.9797055870294571, "aux_brier/n_active_tok": 218.875, "aux_brier/n_step_records": 54.71875, "aux_brier/std_r": 0.05575092099206813, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6299656226126813, "calib/avg_num_step_conf": 6.859375, "calib/ece": 0.5053386454183267, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004416221033868104, "calib/mean_conf": 0.02055776892430279, "calib/mu_c": 0.02265151515151515, "calib/mu_w": 0.018235294117647047, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011522006028064186, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2336.0, "completions/max_terminated_length": 2336.0, "completions/mean_length": 414.734375, "completions/mean_terminated_length": 414.734375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.1696, "grad_norm": 0.3552466630935669, "learning_rate": 1.138888888888889e-06, "loss": 0.0487, "num_tokens": 34347818.0, "reward": 1.1277740001678467, "reward_std": 0.22426848113536835, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.48765861988067627, "rewards/format_reward_step": 0.98046875, "step": 159 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.188457189943875e-06, "aux_brier/mean_r": 0.9805282317101955, "aux_brier/n_active_tok": 236.0, "aux_brier/n_step_records": 59.0, "aux_brier/std_r": 0.05505332382199413, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5663885992353146, "calib/avg_num_step_conf": 7.7578125, "calib/ece": 0.4112396694214876, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025623913799096287, "calib/mean_conf": 0.022644628099173555, "calib/mu_c": 0.024095238095238093, "calib/mu_w": 0.021532846715328464, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009897707810549475, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 451.1796875, "completions/mean_terminated_length": 456.5296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.17066666666666666, "grad_norm": 0.619683563709259, "learning_rate": 1.111111111111111e-06, "loss": 0.122, "num_tokens": 34568160.0, "reward": 1.0213985443115234, "reward_std": 0.2773513197898865, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5543445348739624, "rewards/format_reward_step": 0.9453125, "step": 160 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.0115990320594404e-05, "aux_brier/mean_r": 0.9593694098293781, "aux_brier/n_active_tok": 219.5, "aux_brier/n_step_records": 54.875, "aux_brier/std_r": 0.0983421840282972, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49498384056019396, "calib/avg_num_step_conf": 6.859375, "calib/ece": 0.6046031746031746, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007837328305952025, "calib/mean_conf": 0.022380952380952383, "calib/mu_c": 0.02208860759493671, "calib/mu_w": 0.022872340425531913, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010647942749999, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2499.0, "completions/max_terminated_length": 2499.0, "completions/mean_length": 415.52734375, "completions/mean_terminated_length": 417.1568908691406, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.17173333333333332, "grad_norm": 0.04351496696472168, "learning_rate": 1.0833333333333335e-06, "loss": 0.0421, "num_tokens": 34778455.0, "reward": 1.204907774925232, "reward_std": 0.21517422795295715, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.3899437487125397, "rewards/format_reward_step": 0.98046875, "step": 161 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.4207118001902108e-06, "aux_brier/mean_r": 0.9660868626087904, "aux_brier/n_active_tok": 204.75, "aux_brier/n_step_records": 51.1875, "aux_brier/std_r": 0.08538521652462805, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5258202290817318, "calib/avg_num_step_conf": 6.41796875, "calib/ece": 0.5762992125984252, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005319355464958216, "calib/mean_conf": 0.02606299212598425, "calib/mu_c": 0.026274509803921566, "calib/mu_w": 0.025742574257425745, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014480912243144033, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 381.03515625, "completions/mean_terminated_length": 384.0354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.1728, "grad_norm": 0.19418835639953613, "learning_rate": 1.0555555555555557e-06, "loss": 0.019, "num_tokens": 34980144.0, "reward": 1.1970850229263306, "reward_std": 0.2560886740684509, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.4211527109146118, "rewards/format_reward_step": 0.98828125, "step": 162 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.3165274416788755e-06, "aux_brier/mean_r": 0.9666802864521742, "aux_brier/n_active_tok": 233.5, "aux_brier/n_step_records": 58.375, "aux_brier/std_r": 0.08008781787521002, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6051210307131702, "calib/avg_num_step_conf": 7.296875, "calib/ece": 0.42867469879518066, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002254034357105677, "calib/mean_conf": 0.025140562248995982, "calib/mu_c": 0.02637168141592921, "calib/mu_w": 0.024117647058823532, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01547378665936229, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2693.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 470.75390625, "completions/mean_terminated_length": 470.75390625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.17386666666666667, "grad_norm": 0.028378527611494064, "learning_rate": 1.0277777777777777e-06, "loss": 0.1236, "num_tokens": 35205489.0, "reward": 1.070061445236206, "reward_std": 0.27414482831954956, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5536835789680481, "rewards/format_reward_step": 0.97265625, "step": 163 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.5353006459772889e-06, "aux_brier/mean_r": 0.9791805166751146, "aux_brier/n_active_tok": 230.875, "aux_brier/n_step_records": 57.71875, "aux_brier/std_r": 0.06784880645955127, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5270600777624482, "calib/avg_num_step_conf": 7.42578125, "calib/ece": 0.44583794466403165, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034172833312429464, "calib/mean_conf": 0.024517786561264823, "calib/mu_c": 0.026327731092436978, "calib/mu_w": 0.022910447761194032, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01333146445473278, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2071.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 442.67578125, "completions/mean_terminated_length": 444.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.17493333333333333, "grad_norm": 0.3538491725921631, "learning_rate": 1.0000000000000002e-06, "loss": -0.0095, "num_tokens": 35424950.0, "reward": 1.0996766090393066, "reward_std": 0.2235369235277176, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.54714435338974, "rewards/format_reward_step": 0.98828125, "step": 164 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 8.528439801355758e-06, "aux_brier/mean_r": 0.9771548826247454, "aux_brier/n_active_tok": 236.25, "aux_brier/n_step_records": 59.0625, "aux_brier/std_r": 0.057787248499153065, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5523090815273478, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.3761417322834646, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002880546955624355, "calib/mean_conf": 0.025433070866141737, "calib/mu_c": 0.02715686274509804, "calib/mu_w": 0.024276315789473684, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014808802740305417, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1951.0, "completions/max_terminated_length": 1951.0, "completions/mean_length": 445.875, "completions/mean_terminated_length": 445.875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.176, "grad_norm": 0.06936321407556534, "learning_rate": 9.722222222222224e-07, "loss": 0.0357, "num_tokens": 35644670.0, "reward": 1.0500786304473877, "reward_std": 0.18505971133708954, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6143765449523926, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.37373066239266e-06, "aux_brier/mean_r": 0.9554304387420416, "aux_brier/n_active_tok": 240.125, "aux_brier/n_step_records": 60.03125, "aux_brier/std_r": 0.09545079695089953, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5249762583095915, "calib/avg_num_step_conf": 7.53125, "calib/ece": 0.5091269841269841, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005356125356125313, "calib/mean_conf": 0.02738095238095238, "calib/mu_c": 0.027629629629629626, "calib/mu_w": 0.027094017094017094, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.01566912703216466, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1812.0, "completions/max_terminated_length": 1812.0, "completions/mean_length": 469.49609375, "completions/mean_terminated_length": 473.1929016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.17706666666666668, "grad_norm": 0.38440483808517456, "learning_rate": 9.444444444444445e-07, "loss": 0.0137, "num_tokens": 35871045.0, "reward": 1.14473557472229, "reward_std": 0.23238131403923035, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.485192209482193, "rewards/format_reward_step": 0.984375, "step": 166 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.6742046887729956e-06, "aux_brier/mean_r": 0.9599062111228704, "aux_brier/n_active_tok": 241.5, "aux_brier/n_step_records": 60.375, "aux_brier/std_r": 0.08096258347632102, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5623159646652157, "calib/avg_num_step_conf": 7.546875, "calib/ece": 0.5489723320158102, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020567148892587338, "calib/mean_conf": 0.028102766798418975, "calib/mu_c": 0.028972602739726024, "calib/mu_w": 0.02691588785046729, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.018765173366512268, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 458.4296875, "completions/mean_terminated_length": 458.4296875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.17813333333333334, "grad_norm": 0.03892511501908302, "learning_rate": 9.166666666666666e-07, "loss": 0.0668, "num_tokens": 36094011.0, "reward": 1.1769249439239502, "reward_std": 0.22184979915618896, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.44988712668418884, "rewards/format_reward_step": 0.98828125, "step": 167 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.026360499582768e-06, "aux_brier/mean_r": 0.9439290054142475, "aux_brier/n_active_tok": 220.5, "aux_brier/n_step_records": 55.125, "aux_brier/std_r": 0.11436785755904566, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48118245390972664, "calib/avg_num_step_conf": 7.05078125, "calib/ece": 0.4902788844621514, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002163382072472981, "calib/mean_conf": 0.027649402390438244, "calib/mu_c": 0.02869230769230769, "calib/mu_w": 0.02652892561983471, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.021266020200797126, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 467.21484375, "completions/mean_terminated_length": 469.0470886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1792, "grad_norm": 0.3296682834625244, "learning_rate": 8.88888888888889e-07, "loss": 0.0306, "num_tokens": 36318290.0, "reward": 1.1231977939605713, "reward_std": 0.2897258400917053, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5006039142608643, "rewards/format_reward_step": 0.98046875, "step": 168 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -8.827883374085133e-06, "aux_brier/mean_r": 0.9727369025349617, "aux_brier/n_active_tok": 212.875, "aux_brier/n_step_records": 53.21875, "aux_brier/std_r": 0.05976087189083046, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4809053651266766, "calib/avg_num_step_conf": 6.65234375, "calib/ece": 0.49275590551181103, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002891207153502235, "calib/mean_conf": 0.028661417322834646, "calib/mu_c": 0.02727272727272728, "calib/mu_w": 0.030163934426229513, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008661417322834645, "calib/std_conf": 0.021694449550479097, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 431.203125, "completions/mean_terminated_length": 431.203125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.18026666666666666, "grad_norm": 0.1273512840270996, "learning_rate": 8.611111111111112e-07, "loss": -0.008, "num_tokens": 36532862.0, "reward": 1.1346408128738403, "reward_std": 0.25249183177948, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.49950075149536133, "rewards/format_reward_step": 0.98828125, "step": 169 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.885039529973657e-06, "aux_brier/mean_r": 0.9694046024233103, "aux_brier/n_active_tok": 240.0, "aux_brier/n_step_records": 60.0, "aux_brier/std_r": 0.06970213491331378, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5727455161168945, "calib/avg_num_step_conf": 7.5546875, "calib/ece": 0.49808300395256927, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0063589614950457785, "calib/mean_conf": 0.03156126482213439, "calib/mu_c": 0.034552238805970144, "calib/mu_w": 0.028193277310924365, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.020823775974377342, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2164.0, "completions/max_terminated_length": 2164.0, "completions/mean_length": 462.421875, "completions/mean_terminated_length": 462.421875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.18133333333333335, "grad_norm": 0.06709030270576477, "learning_rate": 8.333333333333333e-07, "loss": 0.0069, "num_tokens": 36755394.0, "reward": 1.1434552669525146, "reward_std": 0.22824034094810486, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.49569636583328247, "rewards/format_reward_step": 0.984375, "step": 170 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.4412550331254224e-06, "aux_brier/mean_r": 0.9558273572474718, "aux_brier/n_active_tok": 217.0, "aux_brier/n_step_records": 54.25, "aux_brier/std_r": 0.08395456541529711, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5241477272727272, "calib/avg_num_step_conf": 6.8125, "calib/ece": 0.44829365079365074, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004469696969697026, "calib/mean_conf": 0.028849206349206347, "calib/mu_c": 0.02908333333333334, "calib/mu_w": 0.028636363636363637, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00047619047619047614, "calib/std_conf": 0.01872591255339563, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2421.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 422.21875, "completions/mean_terminated_length": 425.5433044433594, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.1824, "grad_norm": 0.7966756224632263, "learning_rate": 8.055555555555557e-07, "loss": 0.0142, "num_tokens": 36970378.0, "reward": 1.0963690280914307, "reward_std": 0.2611514627933502, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5417261719703674, "rewards/format_reward_step": 0.984375, "step": 171 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.3489336403438301e-05, "aux_brier/mean_r": 0.9757835790514946, "aux_brier/n_active_tok": 220.375, "aux_brier/n_step_records": 55.09375, "aux_brier/std_r": 0.068253882684985, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5696689259645464, "calib/avg_num_step_conf": 6.92578125, "calib/ece": 0.5166265060240963, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004386730969760163, "calib/mean_conf": 0.033574297188755015, "calib/mu_c": 0.03554744525547445, "calib/mu_w": 0.031160714285714288, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.018423713929276678, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2354.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 400.8515625, "completions/mean_terminated_length": 408.836669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.18346666666666667, "grad_norm": 0.22612127661705017, "learning_rate": 7.777777777777779e-07, "loss": -0.0436, "num_tokens": 37176348.0, "reward": 1.140014410018921, "reward_std": 0.28779447078704834, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4741203188896179, "rewards/format_reward_step": 0.97265625, "step": 172 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.0915806101241543e-06, "aux_brier/mean_r": 0.9580189026892185, "aux_brier/n_active_tok": 244.75, "aux_brier/n_step_records": 61.1875, "aux_brier/std_r": 0.09464163784468838, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5082845082845083, "calib/avg_num_step_conf": 7.65234375, "calib/ece": 0.5343307086614172, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003427203427203568, "calib/mean_conf": 0.02866141732283465, "calib/mu_c": 0.02881118881118882, "calib/mu_w": 0.028468468468468462, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.018480132116155613, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 445.12109375, "completions/mean_terminated_length": 446.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.18453333333333333, "grad_norm": 0.04666566848754883, "learning_rate": 7.5e-07, "loss": 0.0312, "num_tokens": 37393459.0, "reward": 1.167917013168335, "reward_std": 0.2707953453063965, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.4607308506965637, "rewards/format_reward_step": 0.98828125, "step": 173 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 8.524761433481576e-06, "aux_brier/mean_r": 0.9664759822189808, "aux_brier/n_active_tok": 222.625, "aux_brier/n_step_records": 55.65625, "aux_brier/std_r": 0.08584990921372082, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4966977082094974, "calib/avg_num_step_conf": 7.15625, "calib/ece": 0.3785200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005144310151244953, "calib/mean_conf": 0.03348, "calib/mu_c": 0.03650485436893203, "calib/mu_w": 0.031360544217687074, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.030872149261105875, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2839.0, "completions/max_terminated_length": 2839.0, "completions/mean_length": 453.125, "completions/mean_terminated_length": 456.6929016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.1856, "grad_norm": 0.22930726408958435, "learning_rate": 7.222222222222222e-07, "loss": 0.0131, "num_tokens": 37613691.0, "reward": 1.0410170555114746, "reward_std": 0.26726627349853516, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.601568341255188, "rewards/format_reward_step": 0.9765625, "step": 174 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.8267512261194163e-06, "aux_brier/mean_r": 0.964963274076581, "aux_brier/n_active_tok": 227.5, "aux_brier/n_step_records": 56.875, "aux_brier/std_r": 0.07272171173372044, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6380625743683068, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.3365520161290323, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01085083642472177, "calib/mean_conf": 0.030383467741935483, "calib/mu_c": 0.03725274725274725, "calib/mu_w": 0.02640191082802548, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.02196027775928821, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 454.1171875, "completions/mean_terminated_length": 463.1633605957031, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.18666666666666668, "grad_norm": 0.19936507940292358, "learning_rate": 6.944444444444446e-07, "loss": -0.0336, "num_tokens": 37835769.0, "reward": 1.001284122467041, "reward_std": 0.26650482416152954, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6379494667053223, "rewards/format_reward_step": 0.96484375, "step": 175 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.324478931420293e-06, "aux_brier/mean_r": 0.9508497174829245, "aux_brier/n_active_tok": 233.375, "aux_brier/n_step_records": 58.34375, "aux_brier/std_r": 0.11982084469673282, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5259615384615386, "calib/avg_num_step_conf": 7.29296875, "calib/ece": 0.45507874015748023, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007196029776674953, "calib/mean_conf": 0.03311023622047244, "calib/mu_c": 0.032741935483870964, "calib/mu_w": 0.03346153846153846, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02324492268545674, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2947.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 446.9140625, "completions/mean_terminated_length": 446.9140625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.18773333333333334, "grad_norm": 0.030428344383835793, "learning_rate": 6.666666666666667e-07, "loss": 0.0346, "num_tokens": 38054243.0, "reward": 1.1078479290008545, "reward_std": 0.245470330119133, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5329542756080627, "rewards/format_reward_step": 0.98046875, "step": 176 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.162126705260395e-07, "aux_brier/mean_r": 0.9706018976867199, "aux_brier/n_active_tok": 229.625, "aux_brier/n_step_records": 57.40625, "aux_brier/std_r": 0.07250027536611015, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5492196878751501, "calib/avg_num_step_conf": 7.18359375, "calib/ece": 0.437202380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004739053516143309, "calib/mean_conf": 0.03501984126984128, "calib/mu_c": 0.03752100840336135, "calib/mu_w": 0.032781954887218044, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02126690744691755, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2788.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 457.58203125, "completions/mean_terminated_length": 457.58203125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.1888, "grad_norm": 0.04160970449447632, "learning_rate": 6.388888888888889e-07, "loss": 0.0476, "num_tokens": 38275216.0, "reward": 1.0952216386795044, "reward_std": 0.2408543825149536, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5527616143226624, "rewards/format_reward_step": 0.984375, "step": 177 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.164760094126473e-06, "aux_brier/mean_r": 0.960687629878521, "aux_brier/n_active_tok": 225.625, "aux_brier/n_step_records": 56.40625, "aux_brier/std_r": 0.0972331341699828, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5222624491869919, "calib/avg_num_step_conf": 7.14453125, "calib/ece": 0.4768127490039841, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004818343495934976, "calib/mean_conf": 0.03689243027888446, "calib/mu_c": 0.03453125, "calib/mu_w": 0.039349593495934976, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0018725099601593638, "calib/std_conf": 0.02691110462729234, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 413.02734375, "completions/mean_terminated_length": 419.5833435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.18986666666666666, "grad_norm": 0.29774317145347595, "learning_rate": 6.111111111111112e-07, "loss": -0.0138, "num_tokens": 38487023.0, "reward": 1.1184732913970947, "reward_std": 0.2660229504108429, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5129554271697998, "rewards/format_reward_step": 0.98046875, "step": 178 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -3.0702214512956516e-06, "aux_brier/mean_r": 0.9701255839318037, "aux_brier/n_active_tok": 221.875, "aux_brier/n_step_records": 55.46875, "aux_brier/std_r": 0.07886899895675015, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6061815542947618, "calib/avg_num_step_conf": 6.93359375, "calib/ece": 0.5371887550200802, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00958503760390552, "calib/mean_conf": 0.03710843373493976, "calib/mu_c": 0.04118881118881118, "calib/mu_w": 0.03160377358490566, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.026260822767123108, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2140.0, "completions/max_terminated_length": 2140.0, "completions/mean_length": 423.5, "completions/mean_terminated_length": 426.83465576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.19093333333333334, "grad_norm": 0.3159056305885315, "learning_rate": 5.833333333333334e-07, "loss": 0.0433, "num_tokens": 38701703.0, "reward": 1.156509280204773, "reward_std": 0.27797508239746094, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.4541621208190918, "rewards/format_reward_step": 0.96875, "step": 179 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.3361079845702895e-06, "aux_brier/mean_r": 0.9436031747609377, "aux_brier/n_active_tok": 252.125, "aux_brier/n_step_records": 63.03125, "aux_brier/std_r": 0.09938720881109475, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5307985290254794, "calib/avg_num_step_conf": 7.95703125, "calib/ece": 0.5324497991967871, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001438140267927504, "calib/mean_conf": 0.0338152610441767, "calib/mu_c": 0.033191489361702124, "calib/mu_w": 0.03462962962962963, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023152671396695065, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 507.140625, "completions/mean_terminated_length": 511.13385009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.192, "grad_norm": 0.5820158123970032, "learning_rate": 5.555555555555555e-07, "loss": 0.0618, "num_tokens": 38935387.0, "reward": 1.1483807563781738, "reward_std": 0.2538226246833801, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.45289766788482666, "rewards/format_reward_step": 0.96875, "step": 180 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.4328158706722327e-06, "aux_brier/mean_r": 0.97247757576406, "aux_brier/n_active_tok": 212.5, "aux_brier/n_step_records": 53.125, "aux_brier/std_r": 0.07103706735870219, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5437084461955183, "calib/avg_num_step_conf": 6.71875, "calib/ece": 0.4473725490196078, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011955183452351664, "calib/mean_conf": 0.038901960784313724, "calib/mu_c": 0.03951612903225806, "calib/mu_w": 0.038320610687022895, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022166265573809636, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 395.15234375, "completions/mean_terminated_length": 396.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.19306666666666666, "grad_norm": 0.5589336156845093, "learning_rate": 5.277777777777779e-07, "loss": 0.0275, "num_tokens": 39142810.0, "reward": 1.1135826110839844, "reward_std": 0.26310569047927856, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5402675271034241, "rewards/format_reward_step": 0.98828125, "step": 181 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.657386895199565e-06, "aux_brier/mean_r": 0.9651534650474787, "aux_brier/n_active_tok": 247.875, "aux_brier/n_step_records": 61.96875, "aux_brier/std_r": 0.08799552813798073, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5360433958564801, "calib/avg_num_step_conf": 7.75390625, "calib/ece": 0.53476, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002527285798313829, "calib/mean_conf": 0.03724, "calib/mu_c": 0.038321678321678314, "calib/mu_w": 0.035794392523364485, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02502763272864615, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2591.0, "completions/max_terminated_length": 2591.0, "completions/mean_length": 471.42578125, "completions/mean_terminated_length": 475.1377868652344, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.19413333333333332, "grad_norm": 0.15064965188503265, "learning_rate": 5.000000000000001e-07, "loss": 0.0256, "num_tokens": 39369655.0, "reward": 1.1615787744522095, "reward_std": 0.23680523037910461, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.45881521701812744, "rewards/format_reward_step": 0.9765625, "step": 182 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.4631665214713827e-06, "aux_brier/mean_r": 0.9567548129707575, "aux_brier/n_active_tok": 233.125, "aux_brier/n_step_records": 58.28125, "aux_brier/std_r": 0.08843014033300278, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.588068904144419, "calib/avg_num_step_conf": 7.32421875, "calib/ece": 0.41599206349206347, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004031273836765832, "calib/mean_conf": 0.036388888888888894, "calib/mu_c": 0.03859649122807018, "calib/mu_w": 0.034565217391304345, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024945487304534063, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2699.0, "completions/max_terminated_length": 2699.0, "completions/mean_length": 469.6171875, "completions/mean_terminated_length": 471.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.1952, "grad_norm": 0.0742117241024971, "learning_rate": 4.7222222222222226e-07, "loss": -0.0026, "num_tokens": 39596557.0, "reward": 1.0774511098861694, "reward_std": 0.2684721350669861, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5676168203353882, "rewards/format_reward_step": 0.98046875, "step": 183 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 2.0632649647550627e-06, "aux_brier/mean_r": 0.9533497225493193, "aux_brier/n_active_tok": 248.875, "aux_brier/n_step_records": 62.21875, "aux_brier/std_r": 0.10593980570411077, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.43959731543624164, "calib/avg_num_step_conf": 7.77734375, "calib/ece": 0.5573809523809523, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005367824330488047, "calib/mean_conf": 0.035952380952380944, "calib/mu_c": 0.03375838926174496, "calib/mu_w": 0.039126213592233006, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0010317460317460316, "calib/std_conf": 0.02405940305334791, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2362.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 458.21484375, "completions/mean_terminated_length": 458.21484375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.19626666666666667, "grad_norm": 0.060613445937633514, "learning_rate": 4.444444444444445e-07, "loss": 0.0668, "num_tokens": 39819140.0, "reward": 1.181304693222046, "reward_std": 0.2435014545917511, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4361562728881836, "rewards/format_reward_step": 0.98046875, "step": 184 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 4.706697218637146e-06, "aux_brier/mean_r": 0.9584454353898764, "aux_brier/n_active_tok": 269.375, "aux_brier/n_step_records": 67.34375, "aux_brier/std_r": 0.10195384194412327, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5559491978609625, "calib/avg_num_step_conf": 8.59765625, "calib/ece": 0.5157723577235773, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004901069518716578, "calib/mean_conf": 0.03707317073170732, "calib/mu_c": 0.03926470588235294, "calib/mu_w": 0.03436363636363636, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.023102166417859882, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 474.97265625, "completions/mean_terminated_length": 482.5119323730469, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.19733333333333333, "grad_norm": 0.26940837502479553, "learning_rate": 4.1666666666666667e-07, "loss": 0.0332, "num_tokens": 40047653.0, "reward": 1.1203248500823975, "reward_std": 0.23569205403327942, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.45786213874816895, "rewards/format_reward_step": 0.94921875, "step": 185 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.1191971120447803e-06, "aux_brier/mean_r": 0.9693714939057827, "aux_brier/n_active_tok": 236.375, "aux_brier/n_step_records": 59.09375, "aux_brier/std_r": 0.07374859453193494, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5705492424242424, "calib/avg_num_step_conf": 7.56640625, "calib/ece": 0.4878571428571428, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00770454545454545, "calib/mean_conf": 0.035952380952380944, "calib/mu_c": 0.03962121212121212, "calib/mu_w": 0.03191666666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022875736484928546, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 434.70703125, "completions/mean_terminated_length": 438.1299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.1984, "grad_norm": 0.060526859015226364, "learning_rate": 3.8888888888888895e-07, "loss": -0.0347, "num_tokens": 40263978.0, "reward": 1.138674259185791, "reward_std": 0.25158441066741943, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5078219175338745, "rewards/format_reward_step": 0.984375, "step": 186 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.375904671978127e-06, "aux_brier/mean_r": 0.9759796224534512, "aux_brier/n_active_tok": 308.625, "aux_brier/n_step_records": 77.15625, "aux_brier/std_r": 0.073420865423941, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4740840336134454, "calib/avg_num_step_conf": 9.75, "calib/ece": 0.4485655737704918, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0016006722689075603, "calib/mean_conf": 0.03913934426229509, "calib/mu_c": 0.03831932773109244, "calib/mu_w": 0.03992, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02709419379267311, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2784.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 513.859375, "completions/mean_terminated_length": 517.905517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.19946666666666665, "grad_norm": 0.23074400424957275, "learning_rate": 3.611111111111111e-07, "loss": 0.03, "num_tokens": 40497070.0, "reward": 1.0678976774215698, "reward_std": 0.3068844676017761, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5215910077095032, "rewards/format_reward_step": 0.9453125, "step": 187 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.906024643008822e-06, "aux_brier/mean_r": 0.9546673949807882, "aux_brier/n_active_tok": 243.625, "aux_brier/n_step_records": 60.90625, "aux_brier/std_r": 0.09887720412916678, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5427954076367388, "calib/avg_num_step_conf": 7.7265625, "calib/ece": 0.5083799999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014622033023735867, "calib/mean_conf": 0.035620000000000006, "calib/mu_c": 0.03628676470588236, "calib/mu_w": 0.03482456140350877, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02328337604386443, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2948.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 485.06640625, "completions/mean_terminated_length": 488.8858337402344, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.20053333333333334, "grad_norm": 0.15768906474113464, "learning_rate": 3.3333333333333335e-07, "loss": 0.0424, "num_tokens": 40725319.0, "reward": 1.1371263265609741, "reward_std": 0.2551644742488861, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.47819283604621887, "rewards/format_reward_step": 0.97265625, "step": 188 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.7383550271764534e-07, "aux_brier/mean_r": 0.9450946114957333, "aux_brier/n_active_tok": 226.5, "aux_brier/n_step_records": 56.625, "aux_brier/std_r": 0.09436882540285296, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5074691358024692, "calib/avg_num_step_conf": 7.078125, "calib/ece": 0.49435294117647055, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008333333333333456, "calib/mean_conf": 0.03505882352941176, "calib/mu_c": 0.03466666666666666, "calib/mu_w": 0.035500000000000004, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022972628013172618, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2481.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 445.1875, "completions/mean_terminated_length": 445.1875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.2016, "grad_norm": 0.012832799926400185, "learning_rate": 3.055555555555556e-07, "loss": 0.0242, "num_tokens": 40947055.0, "reward": 1.1512812376022339, "reward_std": 0.19333651661872864, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5035625100135803, "rewards/format_reward_step": 0.99609375, "step": 189 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.0287993214048754e-08, "aux_brier/mean_r": 0.962682893499732, "aux_brier/n_active_tok": 239.875, "aux_brier/n_step_records": 59.96875, "aux_brier/std_r": 0.0993830908646487, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48689350976685575, "calib/avg_num_step_conf": 7.5078125, "calib/ece": 0.5087747035573122, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009275362318840671, "calib/mean_conf": 0.03667984189723321, "calib/mu_c": 0.037101449275362325, "calib/mu_w": 0.03617391304347826, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.023675262451531905, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2219.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 490.45703125, "completions/mean_terminated_length": 490.45703125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.20266666666666666, "grad_norm": 0.2315848469734192, "learning_rate": 2.7777777777777776e-07, "loss": 0.0011, "num_tokens": 41178220.0, "reward": 1.1521096229553223, "reward_std": 0.2590298354625702, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4834386706352234, "rewards/format_reward_step": 0.984375, "step": 190 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.107090346316955e-06, "aux_brier/mean_r": 0.9723669160157442, "aux_brier/n_active_tok": 254.75, "aux_brier/n_step_records": 63.6875, "aux_brier/std_r": 0.06812337575138372, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48543123543123545, "calib/avg_num_step_conf": 8.03125, "calib/ece": 0.42767068273092373, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00013597513597513045, "calib/mean_conf": 0.04220883534136547, "calib/mu_c": 0.042136752136752144, "calib/mu_w": 0.042272727272727274, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024632653790854865, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 443.91015625, "completions/mean_terminated_length": 447.405517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.20373333333333332, "grad_norm": 0.3770563006401062, "learning_rate": 2.5000000000000004e-07, "loss": 0.0205, "num_tokens": 41396029.0, "reward": 1.0783849954605103, "reward_std": 0.22541764378547668, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5479148030281067, "rewards/format_reward_step": 0.96875, "step": 191 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 7.722779222729681e-06, "aux_brier/mean_r": 0.9646394904702902, "aux_brier/n_active_tok": 246.375, "aux_brier/n_step_records": 61.59375, "aux_brier/std_r": 0.09039763459486494, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.539198717948718, "calib/avg_num_step_conf": 7.93359375, "calib/ece": 0.44426, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0029038461538461527, "calib/mean_conf": 0.03574, "calib/mu_c": 0.03725, "calib/mu_w": 0.034346153846153846, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02058038872324816, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2368.0, "completions/max_terminated_length": 2368.0, "completions/mean_length": 488.0, "completions/mean_terminated_length": 495.7460632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.2048, "grad_norm": 0.7086110711097717, "learning_rate": 2.2222222222222224e-07, "loss": 0.0436, "num_tokens": 41625933.0, "reward": 1.092299461364746, "reward_std": 0.2804562449455261, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5410733222961426, "rewards/format_reward_step": 0.9765625, "step": 192 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -2.133473562065946e-06, "aux_brier/mean_r": 0.9748077616095543, "aux_brier/n_active_tok": 248.0, "aux_brier/n_step_records": 62.0, "aux_brier/std_r": 0.06305886774225655, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5561734915801901, "calib/avg_num_step_conf": 7.75390625, "calib/ece": 0.4431496062992126, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015683837693407088, "calib/mean_conf": 0.03322834645669291, "calib/mu_c": 0.03404958677685951, "calib/mu_w": 0.0324812030075188, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.016924188986664788, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 466.8125, "completions/mean_terminated_length": 466.8125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.20586666666666667, "grad_norm": 0.05636411905288696, "learning_rate": 1.9444444444444447e-07, "loss": 0.0485, "num_tokens": 41851149.0, "reward": 1.1034059524536133, "reward_std": 0.2727746367454529, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5464363098144531, "rewards/format_reward_step": 0.98828125, "step": 193 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3024469190103805e-06, "aux_brier/mean_r": 0.9504086393862963, "aux_brier/n_active_tok": 219.375, "aux_brier/n_step_records": 54.84375, "aux_brier/std_r": 0.10881438242176955, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5872093023255813, "calib/avg_num_step_conf": 6.90625, "calib/ece": 0.4669019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0075544481358434876, "calib/mean_conf": 0.03898039215686274, "calib/mu_c": 0.04271317829457364, "calib/mu_w": 0.03515873015873015, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022763481414773155, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 401.640625, "completions/mean_terminated_length": 403.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.20693333333333333, "grad_norm": 0.1419486105442047, "learning_rate": 1.6666666666666668e-07, "loss": 0.0437, "num_tokens": 42059913.0, "reward": 1.1352542638778687, "reward_std": 0.20680460333824158, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5332046747207642, "rewards/format_reward_step": 0.99609375, "step": 194 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 6.00365110148271e-06, "aux_brier/mean_r": 0.9609757885336876, "aux_brier/n_active_tok": 245.25, "aux_brier/n_step_records": 61.3125, "aux_brier/std_r": 0.08584168666311598, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.560943601562, "calib/avg_num_step_conf": 7.66796875, "calib/ece": 0.47116, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0051424364637347245, "calib/mean_conf": 0.036840000000000005, "calib/mu_c": 0.03937007874015749, "calib/mu_w": 0.03422764227642276, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.019473428049524302, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 471.43359375, "completions/mean_terminated_length": 475.1456604003906, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.208, "grad_norm": 0.3308982849121094, "learning_rate": 1.3888888888888888e-07, "loss": 0.0007, "num_tokens": 42286584.0, "reward": 1.1148107051849365, "reward_std": 0.23008430004119873, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5139308571815491, "rewards/format_reward_step": 0.97265625, "step": 195 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -4.444754560795072e-07, "aux_brier/mean_r": 0.9569943472743034, "aux_brier/n_active_tok": 196.375, "aux_brier/n_step_records": 49.09375, "aux_brier/std_r": 0.09874178344489337, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5313103018985371, "calib/avg_num_step_conf": 6.14453125, "calib/ece": 0.49417322834645666, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00444942421413011, "calib/mean_conf": 0.037322834645669295, "calib/mu_c": 0.03940740740740741, "calib/mu_w": 0.0349579831932773, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.019600290006597777, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 350.66796875, "completions/mean_terminated_length": 350.66796875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.20906666666666668, "grad_norm": 0.10370674729347229, "learning_rate": 1.1111111111111112e-07, "loss": 0.0105, "num_tokens": 42478899.0, "reward": 1.1535043716430664, "reward_std": 0.18233931064605713, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5046429634094238, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -5.387195559730662e-06, "aux_brier/mean_r": 0.9340419303625822, "aux_brier/n_active_tok": 274.375, "aux_brier/n_step_records": 68.59375, "aux_brier/std_r": 0.13605722029751632, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5285011958543715, "calib/avg_num_step_conf": 9.49609375, "calib/ece": 0.38975806451612904, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030959340951368583, "calib/mean_conf": 0.03766129032258064, "calib/mu_c": 0.03943396226415094, "calib/mu_w": 0.03633802816901408, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024531377897722625, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2811.0, "completions/max_terminated_length": 2811.0, "completions/mean_length": 492.5078125, "completions/mean_terminated_length": 498.34783935546875, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.21013333333333334, "grad_norm": 0.14062941074371338, "learning_rate": 8.333333333333334e-08, "loss": 0.0232, "num_tokens": 42710037.0, "reward": 1.0418548583984375, "reward_std": 0.29935210943222046, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5814820528030396, "rewards/format_reward_step": 0.96484375, "step": 197 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 3.7679767961873445e-06, "aux_brier/mean_r": 0.9732892103493214, "aux_brier/n_active_tok": 223.25, "aux_brier/n_step_records": 55.8125, "aux_brier/std_r": 0.07842400559457019, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.535178664580073, "calib/avg_num_step_conf": 7.19921875, "calib/ece": 0.52872, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002644757433489832, "calib/mean_conf": 0.03928, "calib/mu_c": 0.04042253521126761, "calib/mu_w": 0.03777777777777778, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.02200639906936162, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 398.328125, "completions/mean_terminated_length": 406.2629699707031, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.2112, "grad_norm": 0.26279646158218384, "learning_rate": 5.555555555555556e-08, "loss": -0.0174, "num_tokens": 42917393.0, "reward": 1.1591534614562988, "reward_std": 0.2256597876548767, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.464739054441452, "rewards/format_reward_step": 0.9765625, "step": 198 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": 1.4298935586132266e-06, "aux_brier/mean_r": 0.9634437318891287, "aux_brier/n_active_tok": 239.875, "aux_brier/n_step_records": 59.96875, "aux_brier/std_r": 0.08827852003014414, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5067118553960659, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.42544715447154474, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001008771929824577, "calib/mean_conf": 0.03796747967479675, "calib/mu_c": 0.03850877192982457, "calib/mu_w": 0.03749999999999999, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024276836616144725, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2788.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 482.98046875, "completions/mean_terminated_length": 488.70751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.21226666666666666, "grad_norm": 0.10401692986488342, "learning_rate": 2.777777777777778e-08, "loss": 0.0504, "num_tokens": 43145236.0, "reward": 1.0627738237380981, "reward_std": 0.3272099494934082, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5479702949523926, "rewards/format_reward_step": 0.9609375, "step": 199 }, { "aux_brier/lambda": 0.5, "aux_brier/loss": -1.3051130738461758e-05, "aux_brier/mean_r": 0.9661033805459738, "aux_brier/n_active_tok": 225.5, "aux_brier/n_step_records": 56.375, "aux_brier/std_r": 0.09719932903044537, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5135204081632653, "calib/avg_num_step_conf": 7.09375, "calib/ece": 0.5200396825396826, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0022142857142857103, "calib/mean_conf": 0.035515873015873015, "calib/mu_c": 0.0365, "calib/mu_w": 0.03428571428571429, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.018669317042704532, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2878.0, "completions/max_terminated_length": 2878.0, "completions/mean_length": 461.08203125, "completions/mean_terminated_length": 462.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.21333333333333335, "grad_norm": 0.2908664345741272, "learning_rate": 0.0, "loss": 0.0189, "num_tokens": 43371321.0, "reward": 1.1580218076705933, "reward_std": 0.25010883808135986, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.4758370816707611, "rewards/format_reward_step": 0.984375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.04196281756859662, "train_runtime": 15049.6587, "train_samples_per_second": 3.402, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 43371321, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }