{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.004048487171530724, "learning_rate": 2.5000000000000004e-07, "loss": 0.0322, "num_tokens": 264685.0, "reward": 0.055236753076314926, "reward_std": 0.11281141638755798, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.024703249335289, "step": 1 }, { "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006200637202709913, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.11156807839870453, "reward_std": 0.21452845633029938, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "rewards/stepwise_brier_reward": 0.04943438619375229, "step": 2 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.55, "calib/avg_num_step_conf": 0.26171875, "calib/ece": 0.7683333333333333, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": 0.04800000000000004, "calib/mean_conf": 0.935, "calib/mu_c": 0.975, "calib/mu_w": 0.9269999999999999, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.7683333333333333, "calib/std_conf": 0.1321299865031906, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 637.359375, "completions/mean_terminated_length": 703.2930908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.003514436539262533, "learning_rate": 7.5e-07, "loss": 0.0066, "num_tokens": 801887.0, "reward": 0.030613092705607414, "reward_std": 0.07494865357875824, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.005057031288743019, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.01583283767104149, "step": 3 }, { "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.55, "calib/avg_num_step_conf": 0.3046875, "calib/ece": 0.7733333333333331, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": 0.02400000000000002, "calib/mean_conf": 0.9400000000000001, "calib/mu_c": 0.96, "calib/mu_w": 0.9359999999999999, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.7733333333333331, "calib/std_conf": 0.07291547618075786, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 701.19921875, "completions/mean_terminated_length": 773.737060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.004266666666666667, "grad_norm": 0.003428457770496607, "learning_rate": 1.0000000000000002e-06, "loss": 0.0312, "num_tokens": 1087562.0, "reward": 0.0333736427128315, "reward_std": 0.08957314491271973, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.00750117190182209, "rewards/format_reward_step": 0.03125, "rewards/stepwise_brier_reward": 0.0166183989495039, "step": 4 }, { "calib/answer_extract_rate": 0.06640625, "calib/avg_num_step_conf": 0.234375, "calib/ece": 0.8066666666666666, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.5555555555555556, "calib/mean_conf": 0.8066666666666665, "calib/mu_c": NaN, "calib/mu_w": 0.8066666666666665, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.8066666666666666, "calib/std_conf": 0.2680795901717747, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 679.66015625, "completions/mean_terminated_length": 737.2584838867188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.0035970297176390886, "learning_rate": 1.25e-06, "loss": 0.0245, "num_tokens": 1368243.0, "reward": 0.019157392904162407, "reward_std": 0.05418529361486435, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.006841015536338091, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.015101059339940548, "step": 5 }, { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.7222222222222222, "calib/avg_num_step_conf": 0.45703125, "calib/ece": 0.7522277777777776, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.9444444444444444, "calib/gap": 0.07732666666666665, "calib/mean_conf": 0.9188944444444443, "calib/mu_c": 0.9833333333333334, "calib/mu_w": 0.9060066666666667, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.7522277777777776, "calib/std_conf": 0.2212836301427103, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 647.62890625, "completions/mean_terminated_length": 687.9378051757812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.005158636253327131, "learning_rate": 1.5e-06, "loss": 0.0173, "num_tokens": 1639988.0, "reward": 0.057144567370414734, "reward_std": 0.14538687467575073, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.013710929080843925, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.027367327362298965, "step": 6 }, { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.33333333333333337, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.6594736842105262, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.6842105263157895, "calib/gap": -0.032916666666666816, "calib/mean_conf": 0.8110526315789474, "calib/mu_c": 0.7833333333333332, "calib/mu_w": 0.81625, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.6563157894736841, "calib/std_conf": 0.2555673716272154, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 733.5, "completions/mean_terminated_length": 788.974853515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.004818637389689684, "learning_rate": 1.75e-06, "loss": 0.0298, "num_tokens": 1935188.0, "reward": 0.061079807579517365, "reward_std": 0.12600858509540558, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.020985547453165054, "rewards/format_reward_step": 0.05859375, "rewards/stepwise_brier_reward": 0.035833682864904404, "step": 7 }, { "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.6333333333333333, "calib/avg_num_step_conf": 0.3515625, "calib/ece": 0.7822222222222222, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": -0.018666666666667053, "calib/mean_conf": 0.948888888888889, "calib/mu_c": 0.9333333333333332, "calib/mu_w": 0.9520000000000003, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.7822222222222222, "calib/std_conf": 0.05054029073575181, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2914.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 638.765625, "completions/mean_terminated_length": 701.8197631835938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.004740505013614893, "learning_rate": 2.0000000000000003e-06, "loss": 0.0314, "num_tokens": 2205224.0, "reward": 0.059289030730724335, "reward_std": 0.12507638335227966, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01568320393562317, "rewards/format_reward_step": 0.05078125, "rewards/stepwise_brier_reward": 0.026160426437854767, "step": 8 }, { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6666666666666666, "calib/avg_num_step_conf": 0.18359375, "calib/ece": 0.7, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.07500000000000018, "calib/mean_conf": 0.9, "calib/mu_c": 0.96, "calib/mu_w": 0.8849999999999998, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.7, "calib/std_conf": 0.11366617790706256, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 577.609375, "completions/mean_terminated_length": 645.7117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.004966007545590401, "learning_rate": 2.25e-06, "loss": 0.0356, "num_tokens": 2460628.0, "reward": 0.04672419652342796, "reward_std": 0.10924308001995087, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.014699999243021011, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.023759275674819946, "step": 9 }, { "calib/answer_extract_rate": 0.14453125, "calib/auroc": 0.8888888888888888, "calib/avg_num_step_conf": 0.546875, "calib/ece": 0.874642857142857, "calib/final_conf_rate": 0.109375, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.8214285714285714, "calib/gap": 0.08259259259259266, "calib/mean_conf": 0.9103571428571428, "calib/mu_c": 0.99, "calib/mu_w": 0.9074074074074073, "calib/nonempty_final_conf_rate": 0.109375, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.874642857142857, "calib/std_conf": 0.18690108428289667, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 623.09375, "completions/mean_terminated_length": 670.218505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.007296231109648943, "learning_rate": 2.5e-06, "loss": 0.0498, "num_tokens": 2726940.0, "reward": 0.07001252472400665, "reward_std": 0.1547928899526596, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.017438281327486038, "rewards/format_reward_step": 0.09375, "rewards/stepwise_brier_reward": 0.05167432874441147, "step": 10 }, { "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.44444444444444436, "calib/avg_num_step_conf": 0.65234375, "calib/ece": 0.6481090909090907, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.5757575757575758, "calib/gap": -0.08830277777777784, "calib/mean_conf": 0.788109090909091, "calib/mu_c": 0.7238888888888888, "calib/mu_w": 0.8121916666666666, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.19921875, "calib/nonempty_step_conf_rate": 0.15234375, "calib/pce": 0.5817454545454543, "calib/std_conf": 0.2969892237241352, "calib/step_conf_rate": 0.15234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 615.04296875, "completions/mean_terminated_length": 681.6060791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.005505493376404047, "learning_rate": 2.7500000000000004e-06, "loss": 0.0954, "num_tokens": 2988871.0, "reward": 0.1337997019290924, "reward_std": 0.22659480571746826, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.047456204891204834, "rewards/format_reward_step": 0.10546875, "rewards/stepwise_brier_reward": 0.0658676028251648, "step": 11 }, { "calib/answer_extract_rate": 0.1796875, "calib/auroc": 0.53125, "calib/avg_num_step_conf": 0.68359375, "calib/ece": 0.5513888888888889, "calib/final_conf_rate": 0.140625, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.6388888888888888, "calib/gap": 0.07458333333333345, "calib/mean_conf": 0.8652777777777777, "calib/mu_c": 0.9150000000000001, "calib/mu_w": 0.8404166666666667, "calib/nonempty_final_conf_rate": 0.140625, "calib/nonempty_reasoning_rate": 0.21484375, "calib/nonempty_step_conf_rate": 0.16015625, "calib/pce": 0.5416666666666666, "calib/std_conf": 0.20857301028005507, "calib/step_conf_rate": 0.16015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 631.421875, "completions/mean_terminated_length": 673.5167236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.008510539308190346, "learning_rate": 3e-06, "loss": 0.0547, "num_tokens": 3254691.0, "reward": 0.15881484746932983, "reward_std": 0.2684420943260193, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.05684414133429527, "rewards/format_reward_step": 0.11328125, "rewards/stepwise_brier_reward": 0.07060275226831436, "step": 12 }, { "calib/answer_extract_rate": 0.2421875, "calib/auroc": 0.36122448979591837, "calib/avg_num_step_conf": 0.90625, "calib/ece": 0.6199857142857143, "calib/final_conf_rate": 0.19140625, "calib/format_rate": 0.15234375, "calib/frac_conf_gt_0.9": 0.8163265306122449, "calib/gap": 0.040020000000000056, "calib/mean_conf": 0.8914142857142857, "calib/mu_c": 0.9199999999999999, "calib/mu_w": 0.8799799999999999, "calib/nonempty_final_conf_rate": 0.19140625, "calib/nonempty_reasoning_rate": 0.30078125, "calib/nonempty_step_conf_rate": 0.22265625, "calib/pce": 0.6128428571428571, "calib/std_conf": 0.1766206866105313, "calib/step_conf_rate": 0.22265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 626.4140625, "completions/mean_terminated_length": 649.2388916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.013866666666666666, "grad_norm": 0.0077047646045684814, "learning_rate": 3.2500000000000002e-06, "loss": 0.0781, "num_tokens": 3519645.0, "reward": 0.2043651044368744, "reward_std": 0.37253743410110474, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.07053398340940475, "rewards/format_reward_step": 0.15234375, "rewards/stepwise_brier_reward": 0.09067648649215698, "step": 13 }, { "calib/answer_extract_rate": 0.296875, "calib/auroc": 0.5828804347826086, "calib/avg_num_step_conf": 1.25, "calib/ece": 0.6054903225806453, "calib/final_conf_rate": 0.2421875, "calib/format_rate": 0.21484375, "calib/frac_conf_gt_0.9": 0.7419354838709677, "calib/gap": 0.09208369565217389, "calib/mean_conf": 0.8635548387096774, "calib/mu_c": 0.931875, "calib/mu_w": 0.8397913043478261, "calib/nonempty_final_conf_rate": 0.2421875, "calib/nonempty_reasoning_rate": 0.34375, "calib/nonempty_step_conf_rate": 0.28125, "calib/pce": 0.6054903225806453, "calib/std_conf": 0.23968853019876643, "calib/step_conf_rate": 0.28125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 541.01171875, "completions/mean_terminated_length": 579.4937133789062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.00938513595610857, "learning_rate": 3.5e-06, "loss": 0.0275, "num_tokens": 3763544.0, "reward": 0.25877830386161804, "reward_std": 0.35493573546409607, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.09642301499843597, "rewards/format_reward_step": 0.21484375, "rewards/stepwise_brier_reward": 0.1340026557445526, "step": 14 }, { "calib/answer_extract_rate": 0.43359375, "calib/auroc": 0.4211601307189543, "calib/avg_num_step_conf": 1.921875, "calib/ece": 0.7240764044943823, "calib/final_conf_rate": 0.34765625, "calib/format_rate": 0.3203125, "calib/frac_conf_gt_0.9": 0.7415730337078652, "calib/gap": -0.017873856209150274, "calib/mean_conf": 0.8809303370786516, "calib/mu_c": 0.8664705882352941, "calib/mu_w": 0.8843444444444444, "calib/nonempty_final_conf_rate": 0.34765625, "calib/nonempty_reasoning_rate": 0.45703125, "calib/nonempty_step_conf_rate": 0.3671875, "calib/pce": 0.706997752808989, "calib/std_conf": 0.2148621091155773, "calib/step_conf_rate": 0.3671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 501.13671875, "completions/mean_terminated_length": 525.7827758789062, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.016, "grad_norm": 0.011989172548055649, "learning_rate": 3.7500000000000005e-06, "loss": 0.0734, "num_tokens": 3999715.0, "reward": 0.33935385942459106, "reward_std": 0.4770300090312958, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.10058455169200897, "rewards/format_reward_step": 0.3203125, "rewards/stepwise_brier_reward": 0.19433088600635529, "step": 15 }, { "calib/answer_extract_rate": 0.46875, "calib/auroc": 0.606875, "calib/avg_num_step_conf": 2.4140625, "calib/ece": 0.6073454545454546, "calib/final_conf_rate": 0.4296875, "calib/format_rate": 0.33984375, "calib/frac_conf_gt_0.9": 0.7363636363636363, "calib/gap": 0.08548333333333324, "calib/mean_conf": 0.8651636363636362, "calib/mu_c": 0.9273333333333332, "calib/mu_w": 0.84185, "calib/nonempty_final_conf_rate": 0.4296875, "calib/nonempty_reasoning_rate": 0.5703125, "calib/nonempty_step_conf_rate": 0.46875, "calib/pce": 0.5998909090909091, "calib/std_conf": 0.24001157272373827, "calib/step_conf_rate": 0.46875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 515.44140625, "completions/mean_terminated_length": 536.394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.017066666666666667, "grad_norm": 0.010875885374844074, "learning_rate": 4.000000000000001e-06, "loss": 0.0832, "num_tokens": 4240516.0, "reward": 0.4370579421520233, "reward_std": 0.5463830232620239, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.15203005075454712, "rewards/format_reward_step": 0.33984375, "rewards/stepwise_brier_reward": 0.21338918805122375, "step": 16 }, { "calib/answer_extract_rate": 0.61328125, "calib/auroc": 0.509936766034327, "calib/avg_num_step_conf": 3.4453125, "calib/ece": 0.6482816554809844, "calib/final_conf_rate": 0.58203125, "calib/format_rate": 0.52734375, "calib/frac_conf_gt_0.9": 0.697986577181208, "calib/gap": -0.05793030713640468, "calib/mean_conf": 0.86016129753915, "calib/mu_c": 0.8181715447154472, "calib/mu_w": 0.8761018518518519, "calib/nonempty_final_conf_rate": 0.58203125, "calib/nonempty_reasoning_rate": 0.70703125, "calib/nonempty_step_conf_rate": 0.64453125, "calib/pce": 0.6166375838926175, "calib/std_conf": 0.22649809088873865, "calib/step_conf_rate": 0.64453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 437.734375, "completions/mean_terminated_length": 448.2400207519531, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.011370422318577766, "learning_rate": 4.25e-06, "loss": 0.0739, "num_tokens": 4456104.0, "reward": 0.6475951075553894, "reward_std": 0.6690946817398071, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.21312937140464783, "rewards/format_reward_step": 0.52734375, "rewards/stepwise_brier_reward": 0.3381884694099426, "step": 17 }, { "calib/answer_extract_rate": 0.66015625, "calib/auroc": 0.5611111111111111, "calib/avg_num_step_conf": 3.05859375, "calib/ece": 0.7279539393939392, "calib/final_conf_rate": 0.64453125, "calib/format_rate": 0.56640625, "calib/frac_conf_gt_0.9": 0.7515151515151515, "calib/gap": 0.053852592592592674, "calib/mean_conf": 0.9097721212121211, "calib/mu_c": 0.9538333333333334, "calib/mu_w": 0.8999807407407407, "calib/nonempty_final_conf_rate": 0.64453125, "calib/nonempty_reasoning_rate": 0.73828125, "calib/nonempty_step_conf_rate": 0.6796875, "calib/pce": 0.7279539393939392, "calib/std_conf": 0.15579792646065557, "calib/step_conf_rate": 0.6796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2952.0, "completions/max_terminated_length": 2952.0, "completions/mean_length": 450.29296875, "completions/mean_terminated_length": 455.6324157714844, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0192, "grad_norm": 0.010597813874483109, "learning_rate": 4.5e-06, "loss": 0.2026, "num_tokens": 4682099.0, "reward": 0.6116340756416321, "reward_std": 0.6417558193206787, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.1919897496700287, "rewards/format_reward_step": 0.56640625, "rewards/stepwise_brier_reward": 0.34829652309417725, "step": 18 }, { "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.5453404223896027, "calib/avg_num_step_conf": 4.3046875, "calib/ece": 0.7384040909090911, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.80078125, "calib/frac_conf_gt_0.9": 0.7454545454545455, "calib/gap": 0.018375911977551285, "calib/mean_conf": 0.8987686363636365, "calib/mu_c": 0.914054054054054, "calib/mu_w": 0.8956781420765028, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.7344954545454547, "calib/std_conf": 0.1694991208557371, "calib/step_conf_rate": 0.90625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2385.0, "completions/max_terminated_length": 2385.0, "completions/mean_length": 289.2265625, "completions/mean_terminated_length": 291.5039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.020266666666666665, "grad_norm": 0.01285602804273367, "learning_rate": 4.75e-06, "loss": 0.0468, "num_tokens": 4860901.0, "reward": 0.815706729888916, "reward_std": 0.6081791520118713, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.24730388820171356, "rewards/format_reward_step": 0.80078125, "rewards/stepwise_brier_reward": 0.49989813566207886, "step": 19 }, { "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.5055555555555555, "calib/avg_num_step_conf": 4.4140625, "calib/ece": 0.5623347639484979, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.703862660944206, "calib/gap": 0.01590294117647051, "calib/mean_conf": 0.8831072961373391, "calib/mu_c": 0.89355, "calib/mu_w": 0.8776470588235294, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.5510472103004291, "calib/std_conf": 0.19168427095662832, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2143.0, "completions/max_terminated_length": 2143.0, "completions/mean_length": 277.91015625, "completions/mean_terminated_length": 277.91015625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.021333333333333333, "grad_norm": 0.010729345493018627, "learning_rate": 5e-06, "loss": 0.0631, "num_tokens": 5036918.0, "reward": 1.1385329961776733, "reward_std": 0.6992599368095398, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.40258094668388367, "rewards/format_reward_step": 0.86328125, "rewards/stepwise_brier_reward": 0.5499885678291321, "step": 20 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.491985049833887, "calib/avg_num_step_conf": 4.609375, "calib/ece": 0.6315103305785124, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.731404958677686, "calib/gap": -0.00557931893687702, "calib/mean_conf": 0.9006797520661156, "calib/mu_c": 0.8967142857142858, "calib/mu_w": 0.9022936046511628, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.6214669421487603, "calib/std_conf": 0.17603323585962752, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2280.0, "completions/max_terminated_length": 2280.0, "completions/mean_length": 272.87890625, "completions/mean_terminated_length": 272.87890625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.0224, "grad_norm": 0.009412923827767372, "learning_rate": 4.9722222222222224e-06, "loss": 0.0192, "num_tokens": 5209735.0, "reward": 1.085858941078186, "reward_std": 0.7268727421760559, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.34340929985046387, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.5781517028808594, "step": 21 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.49042674795199087, "calib/avg_num_step_conf": 4.50390625, "calib/ece": 0.6581171548117155, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.7280334728033473, "calib/gap": 0.022399504667555692, "calib/mean_conf": 0.9007949790794979, "calib/mu_c": 0.9177586206896552, "calib/mu_w": 0.8953591160220995, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.6581171548117155, "calib/std_conf": 0.15258098822075492, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2229.0, "completions/max_terminated_length": 2229.0, "completions/mean_length": 268.80078125, "completions/mean_terminated_length": 268.80078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.023466666666666667, "grad_norm": 0.007968787103891373, "learning_rate": 4.944444444444445e-06, "loss": 0.0675, "num_tokens": 5380364.0, "reward": 1.0447142124176025, "reward_std": 0.7075128555297852, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.33695703744888306, "rewards/format_reward_step": 0.90625, "rewards/stepwise_brier_reward": 0.5997121930122375, "step": 22 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48513756960366855, "calib/avg_num_step_conf": 4.1875, "calib/ece": 0.6086831275720165, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.691358024691358, "calib/gap": 0.008890435637078142, "calib/mean_conf": 0.8956790123456789, "calib/mu_c": 0.9019718309859154, "calib/mu_w": 0.8930813953488372, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.6060905349794239, "calib/std_conf": 0.15974726648051205, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 253.1328125, "completions/mean_terminated_length": 253.1328125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 0.024533333333333334, "grad_norm": 0.008745127357542515, "learning_rate": 4.9166666666666665e-06, "loss": 0.0485, "num_tokens": 5549102.0, "reward": 1.1360033750534058, "reward_std": 0.6448456645011902, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.36885470151901245, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.6048462986946106, "step": 23 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4067348219890593, "calib/avg_num_step_conf": 4.7109375, "calib/ece": 0.6607661290322581, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.6975806451612904, "calib/gap": -0.025826383284010324, "calib/mean_conf": 0.88375, "calib/mu_c": 0.8640677966101695, "calib/mu_w": 0.8898941798941798, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6533064516129032, "calib/std_conf": 0.18359989480108027, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1887.0, "completions/max_terminated_length": 1887.0, "completions/mean_length": 264.109375, "completions/mean_terminated_length": 264.109375, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.0256, "grad_norm": 0.008865960873663425, "learning_rate": 4.888888888888889e-06, "loss": 0.0483, "num_tokens": 5721226.0, "reward": 1.070737600326538, "reward_std": 0.50026535987854, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.34463945031166077, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.6492486596107483, "step": 24 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48281747837960853, "calib/avg_num_step_conf": 4.47265625, "calib/ece": 0.5816931174089068, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.6761133603238867, "calib/gap": 0.00861025641025659, "calib/mean_conf": 0.8766728744939271, "calib/mu_c": 0.8825641025641028, "calib/mu_w": 0.8739538461538462, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.5712882591093117, "calib/std_conf": 0.19493444354551928, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 544.0, "completions/max_terminated_length": 544.0, "completions/mean_length": 235.96484375, "completions/mean_terminated_length": 236.8902130126953, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.02666666666666667, "grad_norm": 0.00847149733453989, "learning_rate": 4.861111111111111e-06, "loss": 0.0065, "num_tokens": 5884857.0, "reward": 1.2149608135223389, "reward_std": 0.6195580959320068, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.41003310680389404, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.6763729453086853, "step": 25 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4211283571677712, "calib/avg_num_step_conf": 4.70703125, "calib/ece": 0.6318273092369477, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.5622489959839357, "calib/gap": -0.044770230205790185, "calib/mean_conf": 0.8554417670682731, "calib/mu_c": 0.8216393442622949, "calib/mu_w": 0.8664095744680851, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.621144578313253, "calib/std_conf": 0.1857087952927548, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1691.0, "completions/max_terminated_length": 1691.0, "completions/mean_length": 277.8828125, "completions/mean_terminated_length": 278.9725646972656, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.027733333333333332, "grad_norm": 0.008542955853044987, "learning_rate": 4.833333333333333e-06, "loss": -0.0436, "num_tokens": 6061235.0, "reward": 1.12515127658844, "reward_std": 0.5599552392959595, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.3806217610836029, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7449833154678345, "step": 26 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47859314305016043, "calib/avg_num_step_conf": 4.5390625, "calib/ece": 0.5581027667984191, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.42292490118577075, "calib/gap": 0.0070055733828744415, "calib/mean_conf": 0.7863241106719367, "calib/mu_c": 0.7916129032258065, "calib/mu_w": 0.784607329842932, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.5496837944664033, "calib/std_conf": 0.23719941596990085, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 270.0546875, "completions/mean_terminated_length": 271.1137390136719, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.0288, "grad_norm": 0.00831306166946888, "learning_rate": 4.805555555555556e-06, "loss": -0.0214, "num_tokens": 6235585.0, "reward": 1.1712658405303955, "reward_std": 0.5169739723205566, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.4588109254837036, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7887527346611023, "step": 27 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5224042637189104, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.3866533864541833, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3784860557768924, "calib/gap": 0.03475259902618777, "calib/mean_conf": 0.753585657370518, "calib/mu_c": 0.77421568627451, "calib/mu_w": 0.7394630872483222, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3669322709163347, "calib/std_conf": 0.2642328813209692, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 290.796875, "completions/mean_terminated_length": 290.796875, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.029866666666666666, "grad_norm": 0.007810839917510748, "learning_rate": 4.777777777777778e-06, "loss": 0.0126, "num_tokens": 6416973.0, "reward": 1.4253153800964355, "reward_std": 0.6782314777374268, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5682843923568726, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8126649856567383, "step": 28 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.457579185520362, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.42056000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.24, "calib/gap": -0.022616354234001212, "calib/mean_conf": 0.6572, "calib/mu_c": 0.6407352941176472, "calib/mu_w": 0.6633516483516484, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.40288000000000007, "calib/std_conf": 0.269727566258994, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2290.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 343.81640625, "completions/mean_terminated_length": 343.81640625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.030933333333333334, "grad_norm": 0.006815528497099876, "learning_rate": 4.75e-06, "loss": 0.1056, "num_tokens": 6612118.0, "reward": 1.2306079864501953, "reward_std": 0.5452619791030884, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.5531593561172485, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8458348512649536, "step": 29 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4404323513366067, "calib/avg_num_step_conf": 4.44921875, "calib/ece": 0.29564, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.168, "calib/gap": -0.057594108019640045, "calib/mean_conf": 0.56796, "calib/mu_c": 0.5320212765957446, "calib/mu_w": 0.5896153846153847, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.24380000000000002, "calib/std_conf": 0.28477401285931975, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 333.375, "completions/mean_terminated_length": 333.375, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.032, "grad_norm": 0.00658390112221241, "learning_rate": 4.722222222222222e-06, "loss": 0.0417, "num_tokens": 6804446.0, "reward": 1.3912838697433472, "reward_std": 0.6227531433105469, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.5929761528968811, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8471591472625732, "step": 30 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4990901571546733, "calib/avg_num_step_conf": 4.70703125, "calib/ece": 0.2562549800796813, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07171314741035857, "calib/gap": 0.005024813895781632, "calib/mean_conf": 0.47243027888446215, "calib/mu_c": 0.47615384615384615, "calib/mu_w": 0.4711290322580645, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23486055776892434, "calib/std_conf": 0.26192984013835136, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1807.0, "completions/max_terminated_length": 1807.0, "completions/mean_length": 324.54296875, "completions/mean_terminated_length": 324.54296875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.03306666666666667, "grad_norm": 0.006656688638031483, "learning_rate": 4.694444444444445e-06, "loss": 0.0647, "num_tokens": 6993441.0, "reward": 1.260326862335205, "reward_std": 0.46390360593795776, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.6808546781539917, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8838905096054077, "step": 31 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5255952380952381, "calib/avg_num_step_conf": 4.4765625, "calib/ece": 0.18403162055335967, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05138339920948617, "calib/gap": 0.003857843137254846, "calib/mean_conf": 0.38837944664031615, "calib/mu_c": 0.3909411764705882, "calib/mu_w": 0.38708333333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11822134387351776, "calib/std_conf": 0.24734507726159338, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2182.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 325.09375, "completions/mean_terminated_length": 325.09375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.034133333333333335, "grad_norm": 0.007369569502770901, "learning_rate": 4.666666666666667e-06, "loss": -0.0273, "num_tokens": 7183369.0, "reward": 1.3937727212905884, "reward_std": 0.4453020393848419, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.7063257694244385, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9000150561332703, "step": 32 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.507635052179251, "calib/avg_num_step_conf": 4.73046875, "calib/ece": 0.18476877470355732, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": -0.0005787292817679979, "calib/mean_conf": 0.38416403162055335, "calib/mu_c": 0.38375, "calib/mu_w": 0.384328729281768, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14217391304347826, "calib/std_conf": 0.250309969440167, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2898.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 356.01171875, "completions/mean_terminated_length": 356.01171875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.0352, "grad_norm": 0.00666170846670866, "learning_rate": 4.638888888888889e-06, "loss": 0.0823, "num_tokens": 7381380.0, "reward": 1.3077948093414307, "reward_std": 0.34341514110565186, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7101441621780396, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8804100155830383, "step": 33 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5705141300646918, "calib/avg_num_step_conf": 4.48828125, "calib/ece": 0.17610236220472444, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.047890364317330625, "calib/mean_conf": 0.3272047244094488, "calib/mu_c": 0.35831460674157306, "calib/mu_w": 0.31042424242424244, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07645669291338583, "calib/std_conf": 0.2094378993364557, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 302.9375, "completions/mean_terminated_length": 302.9375, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.03626666666666667, "grad_norm": 0.007425271440297365, "learning_rate": 4.611111111111112e-06, "loss": 0.0247, "num_tokens": 7564044.0, "reward": 1.4313955307006836, "reward_std": 0.5110543966293335, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.7439238429069519, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9113458395004272, "step": 34 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5492224283064632, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.15236947791164657, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.012048192771084338, "calib/gap": 0.025039235268939974, "calib/mean_conf": 0.2701204819277108, "calib/mu_c": 0.2865116279069768, "calib/mu_w": 0.26147239263803684, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.038554216867469876, "calib/std_conf": 0.2011981818398749, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2567.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 390.05078125, "completions/mean_terminated_length": 391.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.037333333333333336, "grad_norm": 0.005499858409166336, "learning_rate": 4.583333333333333e-06, "loss": 0.1043, "num_tokens": 7773153.0, "reward": 1.3877594470977783, "reward_std": 0.5350881814956665, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.7160624861717224, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8662256002426147, "step": 35 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4528992248062016, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.32232283464566924, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.03354976744186047, "calib/mean_conf": 0.2274409448818898, "calib/mu_c": 0.21093023255813953, "calib/mu_w": 0.24448, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020944881889763782, "calib/std_conf": 0.18583114651863405, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2372.0, "completions/max_terminated_length": 2372.0, "completions/mean_length": 345.890625, "completions/mean_terminated_length": 345.890625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.0384, "grad_norm": 0.0065825642086565495, "learning_rate": 4.555555555555556e-06, "loss": 0.0394, "num_tokens": 7964413.0, "reward": 1.6298774480819702, "reward_std": 0.4798555374145508, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.615270733833313, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8964267373085022, "step": 36 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.43758962103106863, "calib/avg_num_step_conf": 4.3046875, "calib/ece": 0.28337398373983747, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": -0.02138818709457152, "calib/mean_conf": 0.1728048780487805, "calib/mu_c": 0.1601980198019802, "calib/mu_w": 0.18158620689655172, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.022804878048780487, "calib/std_conf": 0.16382328167412918, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2505.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 387.6953125, "completions/mean_terminated_length": 389.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.039466666666666664, "grad_norm": 0.006749349180608988, "learning_rate": 4.527777777777778e-06, "loss": 0.135, "num_tokens": 8170759.0, "reward": 1.4470248222351074, "reward_std": 0.43770620226860046, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6383277177810669, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8607094287872314, "step": 37 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.41320792609452406, "calib/avg_num_step_conf": 4.125, "calib/ece": 0.3131474103585657, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.05134020618556702, "calib/mean_conf": 0.1401593625498008, "calib/mu_c": 0.10865979381443298, "calib/mu_w": 0.16, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.033426294820717135, "calib/std_conf": 0.132742953617327, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2396.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 392.48828125, "completions/mean_terminated_length": 392.48828125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.04053333333333333, "grad_norm": 0.006610847543925047, "learning_rate": 4.5e-06, "loss": 0.1519, "num_tokens": 8378124.0, "reward": 1.4403471946716309, "reward_std": 0.4136509895324707, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6473687291145325, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8796446919441223, "step": 38 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.45158783783783785, "calib/avg_num_step_conf": 4.17578125, "calib/ece": 0.3109697580645162, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.03289824324324324, "calib/mean_conf": 0.13335282258064515, "calib/mu_c": 0.11372, "calib/mu_w": 0.14661824324324324, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.020548387096774194, "calib/std_conf": 0.15494879979060558, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2362.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 410.68359375, "completions/mean_terminated_length": 410.68359375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.0416, "grad_norm": 0.006809773854911327, "learning_rate": 4.472222222222223e-06, "loss": 0.147, "num_tokens": 8589347.0, "reward": 1.4385499954223633, "reward_std": 0.4814203977584839, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6225799322128296, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8581823706626892, "step": 39 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.526044761338879, "calib/avg_num_step_conf": 4.35546875, "calib/ece": 0.28428571428571425, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007724301841949277, "calib/mean_conf": 0.1364285714285714, "calib/mu_c": 0.13595959595959595, "calib/mu_w": 0.13673202614379087, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013928571428571427, "calib/std_conf": 0.14631431856996807, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2341.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 407.9609375, "completions/mean_terminated_length": 407.9609375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.042666666666666665, "grad_norm": 0.006085592322051525, "learning_rate": 4.444444444444444e-06, "loss": 0.1084, "num_tokens": 8800545.0, "reward": 1.454925537109375, "reward_std": 0.43846869468688965, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6634172201156616, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8672226667404175, "step": 40 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.518595041322314, "calib/avg_num_step_conf": 4.0390625, "calib/ece": 0.5467490118577075, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.008151515151515173, "calib/mean_conf": 0.12036561264822135, "calib/mu_c": 0.11753030303030303, "calib/mu_w": 0.1256818181818182, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007470355731225296, "calib/std_conf": 0.12852342384790832, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2537.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 348.51953125, "completions/mean_terminated_length": 348.51953125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.04373333333333333, "grad_norm": 0.007085585966706276, "learning_rate": 4.416666666666667e-06, "loss": 0.0491, "num_tokens": 8997014.0, "reward": 1.7957595586776733, "reward_std": 0.4599454700946808, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.46078142523765564, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8863190412521362, "step": 41 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.531181969522346, "calib/avg_num_step_conf": 3.80859375, "calib/ece": 0.3637450199203187, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0048187988218721944, "calib/mean_conf": 0.09824701195219124, "calib/mu_c": 0.10087719298245613, "calib/mu_w": 0.09605839416058394, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003904382470119521, "calib/std_conf": 0.10768532964222766, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1757.0, "completions/max_terminated_length": 1757.0, "completions/mean_length": 317.49609375, "completions/mean_terminated_length": 317.49609375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.0448, "grad_norm": 0.008215104229748249, "learning_rate": 4.388888888888889e-06, "loss": 0.0404, "num_tokens": 9182661.0, "reward": 1.5414741039276123, "reward_std": 0.3999992907047272, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6019105315208435, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8921107053756714, "step": 42 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6046896903275136, "calib/avg_num_step_conf": 3.7421875, "calib/ece": 0.3801195219123506, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025647381164776342, "calib/mean_conf": 0.09581673306772909, "calib/mu_c": 0.10940677966101695, "calib/mu_w": 0.0837593984962406, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.002908366533864542, "calib/std_conf": 0.08601648062509853, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 413.08984375, "completions/mean_terminated_length": 413.08984375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.04586666666666667, "grad_norm": 0.006696638185530901, "learning_rate": 4.361111111111112e-06, "loss": 0.0935, "num_tokens": 9393636.0, "reward": 1.5401055812835693, "reward_std": 0.4751012325286865, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5967199206352234, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8605777025222778, "step": 43 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5637673243667646, "calib/avg_num_step_conf": 3.55859375, "calib/ece": 0.2813432795698924, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.010013213172208185, "calib/mean_conf": 0.12905994623655914, "calib/mu_c": 0.13515670103092783, "calib/mu_w": 0.12514348785871965, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.009637096774193547, "calib/std_conf": 0.11519578926866243, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 407.40234375, "completions/mean_terminated_length": 409.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.046933333333333334, "grad_norm": 0.006890473887324333, "learning_rate": 4.333333333333334e-06, "loss": 0.123, "num_tokens": 9604251.0, "reward": 1.4265779256820679, "reward_std": 0.394540011882782, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6516605615615845, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8671508431434631, "step": 44 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5166147455867081, "calib/avg_num_step_conf": 3.3828125, "calib/ece": 0.3221752988047809, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002900960539979236, "calib/mean_conf": 0.11886055776892432, "calib/mu_c": 0.117196261682243, "calib/mu_w": 0.12009722222222223, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.007370517928286852, "calib/std_conf": 0.10039789603361092, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2149.0, "completions/max_terminated_length": 2149.0, "completions/mean_length": 348.75, "completions/mean_terminated_length": 350.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.048, "grad_norm": 0.008182951249182224, "learning_rate": 4.305555555555556e-06, "loss": 0.0829, "num_tokens": 9798579.0, "reward": 1.483351230621338, "reward_std": 0.47646263241767883, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6229609251022339, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8807565569877625, "step": 45 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5675109760216144, "calib/avg_num_step_conf": 3.1484375, "calib/ece": 0.282601219512195, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012485612968591686, "calib/mean_conf": 0.16008170731707316, "calib/mu_c": 0.16723809523809527, "calib/mu_w": 0.15475248226950358, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.007926829268292683, "calib/std_conf": 0.1016763230323449, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2417.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 388.64453125, "completions/mean_terminated_length": 388.64453125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.04906666666666667, "grad_norm": 0.0073912180960178375, "learning_rate": 4.277777777777778e-06, "loss": 0.1144, "num_tokens": 10002840.0, "reward": 1.4707672595977783, "reward_std": 0.4773736000061035, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6400457620620728, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.875836193561554, "step": 46 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4782161803713528, "calib/avg_num_step_conf": 3.03515625, "calib/ece": 0.3222764227642277, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.008130081300813009, "calib/gap": -0.016023872679045065, "calib/mean_conf": 0.16967479674796748, "calib/mu_c": 0.16120689655172415, "calib/mu_w": 0.17723076923076922, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.010203252032520326, "calib/std_conf": 0.12033783717456865, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2722.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 407.453125, "completions/mean_terminated_length": 410.6614074707031, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.050133333333333335, "grad_norm": 0.007095417007803917, "learning_rate": 4.25e-06, "loss": 0.1039, "num_tokens": 10213124.0, "reward": 1.5107771158218384, "reward_std": 0.47904813289642334, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5969375371932983, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.8602335453033447, "step": 47 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4848875661375662, "calib/avg_num_step_conf": 2.63671875, "calib/ece": 0.28132530120481924, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.013351190476190405, "calib/mean_conf": 0.17457831325301204, "calib/mu_c": 0.1668571428571429, "calib/mu_w": 0.1802083333333333, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.017108433734939754, "calib/std_conf": 0.10642003470546849, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 349.9921875, "completions/mean_terminated_length": 349.9921875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.0512, "grad_norm": 0.008383657783269882, "learning_rate": 4.222222222222223e-06, "loss": 0.1454, "num_tokens": 10406410.0, "reward": 1.4861187934875488, "reward_std": 0.49443191289901733, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6513808369636536, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.910281777381897, "step": 48 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5583248666497332, "calib/avg_num_step_conf": 2.9296875, "calib/ece": 0.2803187250996016, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02079565659131316, "calib/mean_conf": 0.2164940239043825, "calib/mu_c": 0.22701612903225804, "calib/mu_w": 0.20622047244094488, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.001394422310756969, "calib/std_conf": 0.10320201712557886, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2487.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 345.34375, "completions/mean_terminated_length": 345.34375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.05226666666666667, "grad_norm": 0.008482473902404308, "learning_rate": 4.194444444444445e-06, "loss": 0.0898, "num_tokens": 10599354.0, "reward": 1.5967867374420166, "reward_std": 0.4762223958969116, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.64497971534729, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.9062297344207764, "step": 49 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5345538461538462, "calib/avg_num_step_conf": 2.8125, "calib/ece": 0.2992549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012676923076923058, "calib/mean_conf": 0.21486274509803924, "calib/mu_c": 0.22107692307692306, "calib/mu_w": 0.2084, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0021568627450980395, "calib/std_conf": 0.10195289562242836, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2991.0, "completions/max_terminated_length": 2991.0, "completions/mean_length": 359.57421875, "completions/mean_terminated_length": 359.57421875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.05333333333333334, "grad_norm": 0.008044109679758549, "learning_rate": 4.166666666666667e-06, "loss": 0.0145, "num_tokens": 10796765.0, "reward": 1.640196681022644, "reward_std": 0.5175546407699585, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6471250057220459, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9136614799499512, "step": 50 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6100285013587857, "calib/avg_num_step_conf": 2.7421875, "calib/ece": 0.21911290322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03341419765360906, "calib/mean_conf": 0.25016129032258067, "calib/mu_c": 0.2691588785046729, "calib/mu_w": 0.23574468085106384, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.018911290322580647, "calib/std_conf": 0.12980434327794635, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2352.0, "completions/max_terminated_length": 2352.0, "completions/mean_length": 336.10546875, "completions/mean_terminated_length": 338.751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.0544, "grad_norm": 0.008155681192874908, "learning_rate": 4.138888888888889e-06, "loss": 0.0826, "num_tokens": 10992104.0, "reward": 1.497969150543213, "reward_std": 0.4611200988292694, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6864816546440125, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8913326859474182, "step": 51 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6185649599442704, "calib/avg_num_step_conf": 2.609375, "calib/ece": 0.4160317460317461, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.039866248693834955, "calib/mean_conf": 0.2458730158730159, "calib/mu_c": 0.2596363636363636, "calib/mu_w": 0.21977011494252865, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.003571428571428571, "calib/std_conf": 0.11298805837498471, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 305.45703125, "completions/mean_terminated_length": 305.45703125, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.055466666666666664, "grad_norm": 0.008880858309566975, "learning_rate": 4.111111111111111e-06, "loss": 0.0706, "num_tokens": 11178253.0, "reward": 1.8421229124069214, "reward_std": 0.402671217918396, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.5967246294021606, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9280170202255249, "step": 52 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5627160968113715, "calib/avg_num_step_conf": 2.57421875, "calib/ece": 0.2903187250996016, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02710846459213731, "calib/mean_conf": 0.25549800796812755, "calib/mu_c": 0.2678102189781022, "calib/mu_w": 0.2407017543859649, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.10445481207180506, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2941.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 302.88671875, "completions/mean_terminated_length": 304.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.05653333333333333, "grad_norm": 0.00845205970108509, "learning_rate": 4.083333333333334e-06, "loss": 0.0155, "num_tokens": 11361616.0, "reward": 1.682370662689209, "reward_std": 0.4656108617782593, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6496828198432922, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9235502481460571, "step": 53 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5679595612528082, "calib/avg_num_step_conf": 2.71875, "calib/ece": 0.32588235294117646, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.031105457909343215, "calib/mean_conf": 0.3054901960784314, "calib/mu_c": 0.3169565217391304, "calib/mu_w": 0.2858510638297872, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.12430692132056384, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 264.8515625, "completions/mean_terminated_length": 265.89019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.0576, "grad_norm": 0.010827116668224335, "learning_rate": 4.055555555555556e-06, "loss": -0.0316, "num_tokens": 11535650.0, "reward": 1.833878517150879, "reward_std": 0.3862270414829254, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6499722599983215, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9355419874191284, "step": 54 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5752223966509681, "calib/avg_num_step_conf": 2.56640625, "calib/ece": 0.08820717131474104, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.027567373103087323, "calib/mean_conf": 0.3441434262948207, "calib/mu_c": 0.3602884615384615, "calib/mu_w": 0.3327210884353742, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.009003984063745026, "calib/std_conf": 0.12284623947196727, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1956.0, "completions/max_terminated_length": 1956.0, "completions/mean_length": 309.7421875, "completions/mean_terminated_length": 309.7421875, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.058666666666666666, "grad_norm": 0.009377531707286835, "learning_rate": 4.027777777777779e-06, "loss": 0.075, "num_tokens": 11722768.0, "reward": 1.5037016868591309, "reward_std": 0.5124849081039429, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.7266761660575867, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.913130521774292, "step": 55 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5741666666666666, "calib/avg_num_step_conf": 2.83203125, "calib/ece": 0.08925781250000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03299743589743592, "calib/mean_conf": 0.3869921875, "calib/mu_c": 0.4071, "calib/mu_w": 0.3741025641025641, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04281250000000002, "calib/std_conf": 0.13304111183752504, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1018.0, "completions/max_terminated_length": 1018.0, "completions/mean_length": 317.36328125, "completions/mean_terminated_length": 318.60784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.05973333333333333, "grad_norm": 0.008900254033505917, "learning_rate": 4.000000000000001e-06, "loss": -0.0154, "num_tokens": 11910853.0, "reward": 1.5095279216766357, "reward_std": 0.46018385887145996, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.7599589824676514, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.934402585029602, "step": 56 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5446467408189897, "calib/avg_num_step_conf": 2.96875, "calib/ece": 0.11055118110236216, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02669110793512708, "calib/mean_conf": 0.4231496062992126, "calib/mu_c": 0.43586466165413534, "calib/mu_w": 0.40917355371900826, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005039370078740154, "calib/std_conf": 0.12755854476421674, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 978.0, "completions/max_terminated_length": 978.0, "completions/mean_length": 314.765625, "completions/mean_terminated_length": 316.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.0608, "grad_norm": 0.008326790295541286, "learning_rate": 3.972222222222223e-06, "loss": 0.0128, "num_tokens": 12098225.0, "reward": 1.6808912754058838, "reward_std": 0.5460004806518555, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7244125008583069, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9132156372070312, "step": 57 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6601914414414415, "calib/avg_num_step_conf": 3.140625, "calib/ece": 0.05435294117647061, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06885510510510517, "calib/mean_conf": 0.43733333333333335, "calib/mu_c": 0.47621621621621624, "calib/mu_w": 0.40736111111111106, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.028196078431372566, "calib/std_conf": 0.1259565232595517, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1952.0, "completions/max_terminated_length": 1952.0, "completions/mean_length": 348.11328125, "completions/mean_terminated_length": 348.11328125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.06186666666666667, "grad_norm": 0.008115312084555626, "learning_rate": 3.944444444444445e-06, "loss": -0.0317, "num_tokens": 12293662.0, "reward": 1.556930422782898, "reward_std": 0.4506745934486389, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7560132741928101, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9092081785202026, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5198412698412698, "calib/avg_num_step_conf": 3.44140625, "calib/ece": 0.09462745098039219, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.01144333702473238, "calib/mean_conf": 0.49627450980392157, "calib/mu_c": 0.5020634920634921, "calib/mu_w": 0.4906201550387597, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04839215686274512, "calib/std_conf": 0.12670733275457785, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2108.0, "completions/max_terminated_length": 2108.0, "completions/mean_length": 342.8125, "completions/mean_terminated_length": 342.8125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.06293333333333333, "grad_norm": 0.008273365907371044, "learning_rate": 3.916666666666667e-06, "loss": 0.0249, "num_tokens": 12487670.0, "reward": 1.6497235298156738, "reward_std": 0.4084717035293579, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7368066310882568, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9167747497558594, "step": 59 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6054436480474542, "calib/avg_num_step_conf": 3.33984375, "calib/ece": 0.05843137254901969, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.042457983193277204, "calib/mean_conf": 0.47113725490196084, "calib/mu_c": 0.493781512605042, "calib/mu_w": 0.4513235294117648, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03145098039215689, "calib/std_conf": 0.12651544424172143, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1848.0, "completions/max_terminated_length": 1848.0, "completions/mean_length": 357.58203125, "completions/mean_terminated_length": 357.58203125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.064, "grad_norm": 0.008037107065320015, "learning_rate": 3.88888888888889e-06, "loss": 0.0368, "num_tokens": 12688067.0, "reward": 1.6102734804153442, "reward_std": 0.514053225517273, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.749447226524353, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9182088375091553, "step": 60 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5427588364091336, "calib/avg_num_step_conf": 3.59375, "calib/ece": 0.05295275590551177, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008005630278385967, "calib/mean_conf": 0.5275984251968504, "calib/mu_c": 0.5312230215827338, "calib/mu_w": 0.5232173913043479, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.016653543307086578, "calib/std_conf": 0.1110656158365047, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1936.0, "completions/max_terminated_length": 1936.0, "completions/mean_length": 325.60546875, "completions/mean_terminated_length": 325.60546875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.06506666666666666, "grad_norm": 0.00897640734910965, "learning_rate": 3.861111111111112e-06, "loss": 0.0316, "num_tokens": 12875486.0, "reward": 1.7183257341384888, "reward_std": 0.4480532705783844, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7368570566177368, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9020707607269287, "step": 61 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6227478194435603, "calib/avg_num_step_conf": 3.62109375, "calib/ece": 0.09615079365079357, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04824027503660788, "calib/mean_conf": 0.5332142857142858, "calib/mu_c": 0.5598230088495575, "calib/mu_w": 0.5115827338129496, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09047619047619039, "calib/std_conf": 0.11200014931557913, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2175.0, "completions/max_terminated_length": 2175.0, "completions/mean_length": 385.828125, "completions/mean_terminated_length": 385.828125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.06613333333333334, "grad_norm": 0.007599617820233107, "learning_rate": 3.833333333333334e-06, "loss": 0.0473, "num_tokens": 13081338.0, "reward": 1.5627398490905762, "reward_std": 0.5017718076705933, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7422398328781128, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8993446826934814, "step": 62 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5769374068554396, "calib/avg_num_step_conf": 4.07421875, "calib/ece": 0.06889763779527565, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.03668777943368107, "calib/mean_conf": 0.5392125984251969, "calib/mu_c": 0.5582786885245901, "calib/mu_w": 0.521590909090909, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06389763779527564, "calib/std_conf": 0.12531915978725527, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1769.0, "completions/max_terminated_length": 1769.0, "completions/mean_length": 407.23828125, "completions/mean_terminated_length": 408.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0672, "grad_norm": 0.0076432013884186745, "learning_rate": 3.8055555555555556e-06, "loss": 0.0064, "num_tokens": 13294231.0, "reward": 1.6152989864349365, "reward_std": 0.4999202489852905, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.7365000247955322, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8965712785720825, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5838155626692932, "calib/avg_num_step_conf": 3.890625, "calib/ece": 0.04698039215686273, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.028400640236395103, "calib/mean_conf": 0.540235294117647, "calib/mu_c": 0.5540458015267177, "calib/mu_w": 0.5256451612903226, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.036745098039215676, "calib/std_conf": 0.11822188978743951, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2032.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 384.0390625, "completions/mean_terminated_length": 384.0390625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.06826666666666667, "grad_norm": 0.00764108169823885, "learning_rate": 3.777777777777778e-06, "loss": 0.0416, "num_tokens": 13496321.0, "reward": 1.6733813285827637, "reward_std": 0.5097801685333252, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7405202984809875, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9061299562454224, "step": 64 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5989147286821705, "calib/avg_num_step_conf": 3.94140625, "calib/ece": 0.10870472440944882, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.044506356589147256, "calib/mean_conf": 0.565476377952756, "calib/mu_c": 0.58808, "calib/mu_w": 0.5435736434108528, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09102755905511811, "calib/std_conf": 0.11349812441246987, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2238.0, "completions/max_terminated_length": 2238.0, "completions/mean_length": 358.71484375, "completions/mean_terminated_length": 358.71484375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.06933333333333333, "grad_norm": 0.008037811145186424, "learning_rate": 3.7500000000000005e-06, "loss": 0.0679, "num_tokens": 13693176.0, "reward": 1.6414910554885864, "reward_std": 0.3687446713447571, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7481565475463867, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9037454128265381, "step": 65 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5864916165770326, "calib/avg_num_step_conf": 4.12890625, "calib/ece": 0.13609842519685034, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.033580196140461704, "calib/mean_conf": 0.5574448818897638, "calib/mu_c": 0.5766146788990825, "calib/mu_w": 0.5430344827586208, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13220472440944878, "calib/std_conf": 0.1300869292693095, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2106.0, "completions/max_terminated_length": 2106.0, "completions/mean_length": 418.9765625, "completions/mean_terminated_length": 418.9765625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.0704, "grad_norm": 0.007899136282503605, "learning_rate": 3.7222222222222225e-06, "loss": 0.0477, "num_tokens": 13906786.0, "reward": 1.5405974388122559, "reward_std": 0.4160727858543396, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.729138195514679, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9020017385482788, "step": 66 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5375972611266729, "calib/avg_num_step_conf": 4.43359375, "calib/ece": 0.08854330708661423, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.013299097416744399, "calib/mean_conf": 0.586732283464567, "calib/mu_c": 0.5929629629629629, "calib/mu_w": 0.5796638655462185, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07188976377952758, "calib/std_conf": 0.12793003432418582, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2491.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 441.52734375, "completions/mean_terminated_length": 441.52734375, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.07146666666666666, "grad_norm": 0.007587770000100136, "learning_rate": 3.694444444444445e-06, "loss": 0.0066, "num_tokens": 14124825.0, "reward": 1.6739743947982788, "reward_std": 0.5032845735549927, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7150827646255493, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8792521953582764, "step": 67 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5175111773472428, "calib/avg_num_step_conf": 4.24609375, "calib/ece": 0.0871653543307086, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.005992300049677213, "calib/mean_conf": 0.5740157480314961, "calib/mu_c": 0.5768939393939394, "calib/mu_w": 0.5709016393442622, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.07074803149606293, "calib/std_conf": 0.11021996505570755, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2075.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 425.32421875, "completions/mean_terminated_length": 425.32421875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.07253333333333334, "grad_norm": 0.007506520953029394, "learning_rate": 3.6666666666666666e-06, "loss": 0.0285, "num_tokens": 14337796.0, "reward": 1.6663700342178345, "reward_std": 0.41317036747932434, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7226867079734802, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8881058096885681, "step": 68 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5355238095238095, "calib/avg_num_step_conf": 4.40625, "calib/ece": 0.11617529880478081, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.016096507936508075, "calib/mean_conf": 0.5799203187250996, "calib/mu_c": 0.5879365079365081, "calib/mu_w": 0.57184, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0970517928286852, "calib/std_conf": 0.1241480674633258, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 487.3203125, "completions/mean_terminated_length": 487.3203125, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.0736, "grad_norm": 0.007550784852355719, "learning_rate": 3.638888888888889e-06, "loss": 0.0568, "num_tokens": 14567046.0, "reward": 1.6235175132751465, "reward_std": 0.47181078791618347, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7165261507034302, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.879106342792511, "step": 69 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5996880509968807, "calib/avg_num_step_conf": 4.359375, "calib/ece": 0.16271255060728745, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": 0.043323613183236076, "calib/mean_conf": 0.5716194331983806, "calib/mu_c": 0.5972277227722772, "calib/mu_w": 0.5539041095890411, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.16271255060728745, "calib/std_conf": 0.11653485910890778, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2400.0, "completions/max_terminated_length": 2400.0, "completions/mean_length": 522.46484375, "completions/mean_terminated_length": 522.46484375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.07466666666666667, "grad_norm": 0.0067598153837025166, "learning_rate": 3.6111111111111115e-06, "loss": 0.0411, "num_tokens": 14807789.0, "reward": 1.452558159828186, "reward_std": 0.5075742602348328, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.69819176197052, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.8542283773422241, "step": 70 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5672232855917101, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.16580645161290325, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.03225806451612903, "calib/gap": 0.024713880271929334, "calib/mean_conf": 0.6053225806451613, "calib/mu_c": 0.6191743119266055, "calib/mu_w": 0.5944604316546762, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.16580645161290325, "calib/std_conf": 0.12702828922845497, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 526.90625, "completions/mean_terminated_length": 528.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.07573333333333333, "grad_norm": 0.006880332250148058, "learning_rate": 3.5833333333333335e-06, "loss": 0.0742, "num_tokens": 15047085.0, "reward": 1.5022592544555664, "reward_std": 0.5741759538650513, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6890460848808289, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8590537309646606, "step": 71 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5645356234096692, "calib/avg_num_step_conf": 4.21875, "calib/ece": 0.11254980079681268, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.03358651399491086, "calib/mean_conf": 0.5906374501992032, "calib/mu_c": 0.6081666666666666, "calib/mu_w": 0.5745801526717558, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11254980079681268, "calib/std_conf": 0.1300735006432676, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2470.0, "completions/max_terminated_length": 2470.0, "completions/mean_length": 491.3984375, "completions/mean_terminated_length": 495.2677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.0768, "grad_norm": 0.007533502299338579, "learning_rate": 3.555555555555556e-06, "loss": 0.0568, "num_tokens": 15277291.0, "reward": 1.5845632553100586, "reward_std": 0.4872770607471466, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7165089845657349, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8717440366744995, "step": 72 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5071045743487474, "calib/avg_num_step_conf": 4.3984375, "calib/ece": 0.14078431372549027, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.0045431883335410594, "calib/mean_conf": 0.6023529411764705, "calib/mu_c": 0.6043661971830986, "calib/mu_w": 0.5998230088495575, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0931372549019608, "calib/std_conf": 0.11657848042924825, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2376.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 485.21875, "completions/mean_terminated_length": 485.21875, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.07786666666666667, "grad_norm": 0.007058590184897184, "learning_rate": 3.5277777777777784e-06, "loss": 0.0067, "num_tokens": 15508539.0, "reward": 1.7351597547531128, "reward_std": 0.43965622782707214, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7347210645675659, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8934180736541748, "step": 73 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.528050896471949, "calib/avg_num_step_conf": 4.24609375, "calib/ece": 0.12879999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.012280701754386003, "calib/mean_conf": 0.5968000000000001, "calib/mu_c": 0.6033333333333334, "calib/mu_w": 0.5910526315789474, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.12879999999999994, "calib/std_conf": 0.11661286378440416, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1897.0, "completions/max_terminated_length": 1897.0, "completions/mean_length": 489.35546875, "completions/mean_terminated_length": 493.2086486816406, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.07893333333333333, "grad_norm": 0.00773122813552618, "learning_rate": 3.5e-06, "loss": 0.0199, "num_tokens": 15737742.0, "reward": 1.5563820600509644, "reward_std": 0.42694351077079773, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6976984739303589, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8637673854827881, "step": 74 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5132485029940119, "calib/avg_num_step_conf": 4.44921875, "calib/ece": 0.11716599190283394, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.020242914979757085, "calib/gap": 0.00283532934131725, "calib/mean_conf": 0.6114170040485829, "calib/mu_c": 0.6123353293413173, "calib/mu_w": 0.6095, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.026234817813765153, "calib/std_conf": 0.13058268741825027, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 512.6015625, "completions/mean_terminated_length": 512.6015625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.08, "grad_norm": 0.007090203929692507, "learning_rate": 3.4722222222222224e-06, "loss": 0.0481, "num_tokens": 15973720.0, "reward": 1.843395709991455, "reward_std": 0.4259081482887268, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7201793193817139, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.848715603351593, "step": 75 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6342339478703115, "calib/avg_num_step_conf": 4.2734375, "calib/ece": 0.0597233201581028, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.05968531468531457, "calib/mean_conf": 0.59600790513834, "calib/mu_c": 0.6219580419580419, "calib/mu_w": 0.5622727272727274, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.045256916996047475, "calib/std_conf": 0.12177975298835207, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 543.16796875, "completions/mean_terminated_length": 543.16796875, "completions/min_length": 214.0, "completions/min_terminated_length": 214.0, "epoch": 0.08106666666666666, "grad_norm": 0.007007664535194635, "learning_rate": 3.444444444444445e-06, "loss": 0.0547, "num_tokens": 16215827.0, "reward": 1.7331316471099854, "reward_std": 0.4490346610546112, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.750145673751831, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.877693772315979, "step": 76 }, { "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.5592497868712702, "calib/avg_num_step_conf": 4.8203125, "calib/ece": 0.10116666666666665, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.0625, "calib/gap": 0.03336743393009378, "calib/mean_conf": 0.6318333333333332, "calib/mu_c": 0.6460144927536232, "calib/mu_w": 0.6126470588235294, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.07900000000000001, "calib/std_conf": 0.14462297727386045, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 559.16796875, "completions/mean_terminated_length": 565.7984619140625, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.08213333333333334, "grad_norm": 0.007006470579653978, "learning_rate": 3.416666666666667e-06, "loss": 0.1127, "num_tokens": 16463638.0, "reward": 1.654437780380249, "reward_std": 0.4745751917362213, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6986671686172485, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.8253339529037476, "step": 77 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5848951507208388, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.06309036144578307, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": 0.036206716906946323, "calib/mean_conf": 0.6094397590361446, "calib/mu_c": 0.6252892857142858, "calib/mu_w": 0.5890825688073394, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.055140562248995946, "calib/std_conf": 0.13684808357244896, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 601.453125, "completions/mean_terminated_length": 601.453125, "completions/min_length": 245.0, "completions/min_terminated_length": 245.0, "epoch": 0.0832, "grad_norm": 0.006692703813314438, "learning_rate": 3.3888888888888893e-06, "loss": 0.0732, "num_tokens": 16725634.0, "reward": 1.694908618927002, "reward_std": 0.4652491807937622, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7221781015396118, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8543316125869751, "step": 78 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.584365741987611, "calib/avg_num_step_conf": 4.5546875, "calib/ece": 0.06428571428571424, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.03968253968253968, "calib/gap": 0.04562079181255041, "calib/mean_conf": 0.6375396825396825, "calib/mu_c": 0.6545569620253164, "calib/mu_w": 0.608936170212766, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.037420634920634875, "calib/std_conf": 0.13801130910606224, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 541.921875, "completions/mean_terminated_length": 546.18896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.08426666666666667, "grad_norm": 0.006624994333833456, "learning_rate": 3.3611111111111117e-06, "loss": -0.0001, "num_tokens": 16970742.0, "reward": 1.8090747594833374, "reward_std": 0.46922507882118225, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7410902976989746, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.846771240234375, "step": 79 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6140056022408963, "calib/avg_num_step_conf": 5.21484375, "calib/ece": 0.08090551181102364, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09055118110236221, "calib/gap": 0.05761624649859942, "calib/mean_conf": 0.6861811023622046, "calib/mu_c": 0.7052352941176471, "calib/mu_w": 0.6476190476190476, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04889763779527561, "calib/std_conf": 0.1526510568410527, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 512.46875, "completions/mean_terminated_length": 514.4784545898438, "completions/min_length": 0.0, "completions/min_terminated_length": 228.0, "epoch": 0.08533333333333333, "grad_norm": 0.007300146389752626, "learning_rate": 3.3333333333333333e-06, "loss": 0.0112, "num_tokens": 17204094.0, "reward": 1.8976236581802368, "reward_std": 0.3876197636127472, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7715495824813843, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8580073714256287, "step": 80 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5120606020333577, "calib/avg_num_step_conf": 4.8203125, "calib/ece": 0.10596000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.056, "calib/gap": -0.0010200013289919374, "calib/mean_conf": 0.6486000000000001, "calib/mu_c": 0.6481879194630873, "calib/mu_w": 0.6492079207920792, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.07928000000000007, "calib/std_conf": 0.13323978384851873, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2699.0, "completions/max_terminated_length": 2699.0, "completions/mean_length": 593.37890625, "completions/mean_terminated_length": 593.37890625, "completions/min_length": 255.0, "completions/min_terminated_length": 255.0, "epoch": 0.0864, "grad_norm": 0.006872696802020073, "learning_rate": 3.3055555555555558e-06, "loss": 0.0943, "num_tokens": 17462247.0, "reward": 1.7374032735824585, "reward_std": 0.5122434496879578, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7063796520233154, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8369834423065186, "step": 81 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4951629658485911, "calib/avg_num_step_conf": 4.859375, "calib/ece": 0.09696047430830036, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.0017204908453447576, "calib/mean_conf": 0.6391739130434783, "calib/mu_c": 0.6398675496688742, "calib/mu_w": 0.6381470588235294, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.06964822134387352, "calib/std_conf": 0.12718397784570376, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1997.0, "completions/max_terminated_length": 1997.0, "completions/mean_length": 513.69921875, "completions/mean_terminated_length": 515.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.08746666666666666, "grad_norm": 0.007474968209862709, "learning_rate": 3.277777777777778e-06, "loss": 0.0468, "num_tokens": 17699306.0, "reward": 1.7570503950119019, "reward_std": 0.4566406011581421, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7145159244537354, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.852747917175293, "step": 82 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5002272727272726, "calib/avg_num_step_conf": 4.76953125, "calib/ece": 0.147916, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.052, "calib/gap": 0.0014292207792204747, "calib/mean_conf": 0.652164, "calib/mu_c": 0.6527928571428571, "calib/mu_w": 0.6513636363636366, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.12004000000000001, "calib/std_conf": 0.14608970225173298, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2585.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 565.4765625, "completions/mean_terminated_length": 565.4765625, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 0.08853333333333334, "grad_norm": 0.006757485214620829, "learning_rate": 3.2500000000000002e-06, "loss": 0.0716, "num_tokens": 17951332.0, "reward": 1.6795960664749146, "reward_std": 0.4769311249256134, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6928104758262634, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8380734324455261, "step": 83 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6134135060129509, "calib/avg_num_step_conf": 4.59375, "calib/ece": 0.10415820312500004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": 0.050948041936478505, "calib/mean_conf": 0.604669921875, "calib/mu_c": 0.6275567375886525, "calib/mu_w": 0.576608695652174, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07902343750000004, "calib/std_conf": 0.14945810134028834, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 497.9765625, "completions/mean_terminated_length": 499.929443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.0896, "grad_norm": 0.007209381554275751, "learning_rate": 3.2222222222222227e-06, "loss": -0.0137, "num_tokens": 18184734.0, "reward": 1.7335989475250244, "reward_std": 0.4257844388484955, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7492986917495728, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8882222175598145, "step": 84 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5502873563218391, "calib/avg_num_step_conf": 5.04296875, "calib/ece": 0.1505458167330678, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.05179282868525897, "calib/gap": 0.04549584929757344, "calib/mean_conf": 0.659788844621514, "calib/mu_c": 0.6808148148148148, "calib/mu_w": 0.6353189655172413, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.1362430278884463, "calib/std_conf": 0.15400511334146827, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2728.0, "completions/max_terminated_length": 2728.0, "completions/mean_length": 549.375, "completions/mean_terminated_length": 553.7008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.09066666666666667, "grad_norm": 0.007047437597066164, "learning_rate": 3.1944444444444443e-06, "loss": 0.0244, "num_tokens": 18433198.0, "reward": 1.6656556129455566, "reward_std": 0.42222103476524353, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7122802138328552, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8487800359725952, "step": 85 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5687830687830687, "calib/avg_num_step_conf": 5.08984375, "calib/ece": 0.1667450980392158, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.03538252122554442, "calib/mean_conf": 0.6458823529411765, "calib/mu_c": 0.663781746031746, "calib/mu_w": 0.6283992248062016, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.15925490196078443, "calib/std_conf": 0.1644900732518082, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 530.42578125, "completions/mean_terminated_length": 532.5059204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.09173333333333333, "grad_norm": 0.008088787086308002, "learning_rate": 3.1666666666666667e-06, "loss": 0.01, "num_tokens": 18674499.0, "reward": 1.6263952255249023, "reward_std": 0.4225319027900696, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7082681655883789, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8676250576972961, "step": 86 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5656147316538884, "calib/avg_num_step_conf": 4.8046875, "calib/ece": 0.10925196850393704, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.021319824753559535, "calib/mean_conf": 0.6248425196850393, "calib/mu_c": 0.6322289156626505, "calib/mu_w": 0.610909090909091, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04027559055118113, "calib/std_conf": 0.1470329078381842, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2272.0, "completions/max_terminated_length": 2272.0, "completions/mean_length": 491.3046875, "completions/mean_terminated_length": 493.2314147949219, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.0928, "grad_norm": 0.008023228496313095, "learning_rate": 3.138888888888889e-06, "loss": 0.0066, "num_tokens": 18905769.0, "reward": 1.877850890159607, "reward_std": 0.45576608180999756, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7548441290855408, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8815594911575317, "step": 87 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5912598026815076, "calib/avg_num_step_conf": 5.22265625, "calib/ece": 0.1577380952380954, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.05075006324310638, "calib/mean_conf": 0.6775793650793651, "calib/mu_c": 0.7013432835820895, "calib/mu_w": 0.6505932203389831, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1517857142857144, "calib/std_conf": 0.1594935363757059, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2424.0, "completions/max_terminated_length": 2424.0, "completions/mean_length": 540.6640625, "completions/mean_terminated_length": 542.7843627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.09386666666666667, "grad_norm": 0.006685222499072552, "learning_rate": 3.1111111111111116e-06, "loss": 0.0279, "num_tokens": 19154027.0, "reward": 1.6725934743881226, "reward_std": 0.4166548252105713, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7181754112243652, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8628235459327698, "step": 88 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6243885613947071, "calib/avg_num_step_conf": 5.3515625, "calib/ece": 0.1483280632411068, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07114624505928854, "calib/gap": 0.058687256992349335, "calib/mean_conf": 0.6623438735177865, "calib/mu_c": 0.6899477611940299, "calib/mu_w": 0.6312605042016806, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14051383399209497, "calib/std_conf": 0.1611301070417088, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2196.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 560.203125, "completions/mean_terminated_length": 560.203125, "completions/min_length": 201.0, "completions/min_terminated_length": 201.0, "epoch": 0.09493333333333333, "grad_norm": 0.0073220268823206425, "learning_rate": 3.0833333333333336e-06, "loss": 0.062, "num_tokens": 19406327.0, "reward": 1.6657295227050781, "reward_std": 0.4641486406326294, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.717314600944519, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8518532514572144, "step": 89 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5211285834465799, "calib/avg_num_step_conf": 5.5, "calib/ece": 0.15087795275590554, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1141732283464567, "calib/gap": 0.004715977480100997, "calib/mean_conf": 0.7173110236220472, "calib/mu_c": 0.719186274509804, "calib/mu_w": 0.714470297029703, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1329133858267717, "calib/std_conf": 0.15895752823943748, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2126.0, "completions/max_terminated_length": 2126.0, "completions/mean_length": 525.265625, "completions/mean_terminated_length": 527.3255004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.096, "grad_norm": 0.007360328454524279, "learning_rate": 3.055555555555556e-06, "loss": 0.0176, "num_tokens": 19644115.0, "reward": 1.7847692966461182, "reward_std": 0.4164116084575653, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7149360775947571, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8616410493850708, "step": 90 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5034208640125106, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.1553174603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": 0.004256206424708364, "calib/mean_conf": 0.6987301587301588, "calib/mu_c": 0.7004697986577182, "calib/mu_w": 0.6962135922330098, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1313888888888889, "calib/std_conf": 0.14236095486966704, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1953.0, "completions/max_terminated_length": 1953.0, "completions/mean_length": 542.41015625, "completions/mean_terminated_length": 546.6810913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.09706666666666666, "grad_norm": 0.007123507093638182, "learning_rate": 3.0277777777777776e-06, "loss": 0.0054, "num_tokens": 19890684.0, "reward": 1.752280592918396, "reward_std": 0.5156278014183044, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7117398977279663, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8520700931549072, "step": 91 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6385819227084, "calib/avg_num_step_conf": 5.05078125, "calib/ece": 0.127392578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.1015625, "calib/gap": 0.07632178217821772, "calib/mean_conf": 0.698388671875, "calib/mu_c": 0.7284999999999999, "calib/mu_w": 0.6521782178217822, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11015625000000001, "calib/std_conf": 0.1656455361932101, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 968.0, "completions/max_terminated_length": 968.0, "completions/mean_length": 472.9296875, "completions/mean_terminated_length": 474.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.09813333333333334, "grad_norm": 0.008139155805110931, "learning_rate": 3e-06, "loss": -0.0132, "num_tokens": 20118474.0, "reward": 1.8182064294815063, "reward_std": 0.4615139067173004, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7615140676498413, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8784990310668945, "step": 92 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4482542860718308, "calib/avg_num_step_conf": 5.66015625, "calib/ece": 0.26301581027667986, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.17786561264822134, "calib/gap": -0.02694550118883754, "calib/mean_conf": 0.7468102766798418, "calib/mu_c": 0.7338167938931297, "calib/mu_w": 0.7607622950819672, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24601976284584984, "calib/std_conf": 0.15431212777926867, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2705.0, "completions/max_terminated_length": 2705.0, "completions/mean_length": 551.3984375, "completions/mean_terminated_length": 553.560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.0992, "grad_norm": 0.006671508774161339, "learning_rate": 2.9722222222222225e-06, "loss": 0.0129, "num_tokens": 20365408.0, "reward": 1.6373653411865234, "reward_std": 0.500267744064331, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6528552174568176, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8497312068939209, "step": 93 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6566200554295792, "calib/avg_num_step_conf": 5.12109375, "calib/ece": 0.13701960784313733, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.09803921568627451, "calib/gap": 0.08411564625850343, "calib/mean_conf": 0.7134901960784313, "calib/mu_c": 0.7491156462585035, "calib/mu_w": 0.665, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13701960784313733, "calib/std_conf": 0.1480195234273095, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2389.0, "completions/max_terminated_length": 2389.0, "completions/mean_length": 501.3203125, "completions/mean_terminated_length": 501.3203125, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.10026666666666667, "grad_norm": 0.007690585218369961, "learning_rate": 2.944444444444445e-06, "loss": 0.0537, "num_tokens": 20602426.0, "reward": 1.7664459943771362, "reward_std": 0.451269268989563, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7532836198806763, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8750002980232239, "step": 94 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5667896917896917, "calib/avg_num_step_conf": 5.07421875, "calib/ece": 0.125321568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.10588235294117647, "calib/gap": 0.03936072261072254, "calib/mean_conf": 0.7037764705882352, "calib/mu_c": 0.7190576923076923, "calib/mu_w": 0.6796969696969698, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10866666666666666, "calib/std_conf": 0.15970846305646103, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 489.39453125, "completions/mean_terminated_length": 491.3137512207031, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.10133333333333333, "grad_norm": 0.007843728177249432, "learning_rate": 2.916666666666667e-06, "loss": 0.0273, "num_tokens": 20833839.0, "reward": 1.8181705474853516, "reward_std": 0.46654027700424194, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7442966103553772, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.879948079586029, "step": 95 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7065189132978031, "calib/avg_num_step_conf": 5.1328125, "calib/ece": 0.1158254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.10196078431372549, "calib/gap": 0.100928181463668, "calib/mean_conf": 0.7160176470588235, "calib/mu_c": 0.7548057324840763, "calib/mu_w": 0.6538775510204083, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.108078431372549, "calib/std_conf": 0.15414507007931777, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 977.0, "completions/max_terminated_length": 977.0, "completions/mean_length": 466.3515625, "completions/mean_terminated_length": 468.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.1024, "grad_norm": 0.008034911006689072, "learning_rate": 2.888888888888889e-06, "loss": 0.0085, "num_tokens": 21059041.0, "reward": 1.829664707183838, "reward_std": 0.4343709945678711, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7742824554443359, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8725016117095947, "step": 96 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46652902902902904, "calib/avg_num_step_conf": 5.09375, "calib/ece": 0.18572549019607837, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0784313725490196, "calib/gap": -0.021219969969969887, "calib/mean_conf": 0.7090980392156863, "calib/mu_c": 0.6998611111111112, "calib/mu_w": 0.721081081081081, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1650588235294117, "calib/std_conf": 0.14351187078070876, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2473.0, "completions/max_terminated_length": 2473.0, "completions/mean_length": 488.76953125, "completions/mean_terminated_length": 488.76953125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.10346666666666667, "grad_norm": 0.0073645696975290775, "learning_rate": 2.861111111111111e-06, "loss": 0.0151, "num_tokens": 21289238.0, "reward": 1.7355914115905762, "reward_std": 0.4348849058151245, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6995663642883301, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8756111860275269, "step": 97 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.574774416135881, "calib/avg_num_step_conf": 4.91015625, "calib/ece": 0.11916996047430833, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09486166007905138, "calib/gap": 0.0338010881104035, "calib/mean_conf": 0.6954545454545455, "calib/mu_c": 0.7082802547770701, "calib/mu_w": 0.6744791666666666, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0970355731225297, "calib/std_conf": 0.15102904894616762, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2311.0, "completions/max_terminated_length": 2311.0, "completions/mean_length": 496.5703125, "completions/mean_terminated_length": 496.5703125, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 0.10453333333333334, "grad_norm": 0.007983372546732426, "learning_rate": 2.8333333333333335e-06, "loss": 0.0391, "num_tokens": 21522544.0, "reward": 1.8186938762664795, "reward_std": 0.41583630442619324, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7432183623313904, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8753073215484619, "step": 98 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5388136942675158, "calib/avg_num_step_conf": 5.1015625, "calib/ece": 0.3299209486166008, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.11857707509881422, "calib/gap": 0.02430135350318463, "calib/mean_conf": 0.6964822134387351, "calib/mu_c": 0.7115625, "calib/mu_w": 0.6872611464968154, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3234782608695652, "calib/std_conf": 0.16562934439031005, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2836.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 557.22265625, "completions/mean_terminated_length": 559.4078979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.1056, "grad_norm": 0.006918633822351694, "learning_rate": 2.805555555555556e-06, "loss": 0.0349, "num_tokens": 21770993.0, "reward": 1.4331088066101074, "reward_std": 0.43188124895095825, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6404386758804321, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8654338121414185, "step": 99 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.63125, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.1452734375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.05859375, "calib/gap": 0.06177586206896557, "calib/mean_conf": 0.6855078125, "calib/mu_c": 0.7135000000000001, "calib/mu_w": 0.6517241379310346, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1419531250000001, "calib/std_conf": 0.1442292823127982, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 484.73828125, "completions/mean_terminated_length": 486.6392517089844, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.10666666666666667, "grad_norm": 0.0076471734791994095, "learning_rate": 2.7777777777777783e-06, "loss": 0.0295, "num_tokens": 22002494.0, "reward": 1.7281349897384644, "reward_std": 0.4057810306549072, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7427926063537598, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8884974718093872, "step": 100 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6494941086065573, "calib/avg_num_step_conf": 4.91015625, "calib/ece": 0.18779000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.076, "calib/gap": 0.07791880122950812, "calib/mean_conf": 0.6602899999999999, "calib/mu_c": 0.7001844262295082, "calib/mu_w": 0.622265625, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1800400000000001, "calib/std_conf": 0.16406870786350453, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2651.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 545.35546875, "completions/mean_terminated_length": 545.35546875, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 0.10773333333333333, "grad_norm": 0.006997666321694851, "learning_rate": 2.7500000000000004e-06, "loss": 0.0487, "num_tokens": 22249097.0, "reward": 1.6012694835662842, "reward_std": 0.49606239795684814, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7136021256446838, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8633507490158081, "step": 101 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5767975988516247, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.07235294117647073, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.03954391230588539, "calib/mean_conf": 0.6566666666666666, "calib/mu_c": 0.6717088607594937, "calib/mu_w": 0.6321649484536083, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05470588235294127, "calib/std_conf": 0.15315334704934885, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2341.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 434.3828125, "completions/mean_terminated_length": 434.3828125, "completions/min_length": 196.0, "completions/min_terminated_length": 196.0, "epoch": 0.1088, "grad_norm": 0.007858687080442905, "learning_rate": 2.7222222222222224e-06, "loss": 0.0249, "num_tokens": 22466995.0, "reward": 1.8360176086425781, "reward_std": 0.39576029777526855, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7551559209823608, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8936017155647278, "step": 102 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6310987322320399, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.1661354581673307, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.06374501992031872, "calib/gap": 0.06998271225509023, "calib/mean_conf": 0.6900398406374502, "calib/mu_c": 0.7218248175182482, "calib/mu_w": 0.651842105263158, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15517928286852592, "calib/std_conf": 0.15271904263203304, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2632.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 533.00390625, "completions/mean_terminated_length": 535.0941772460938, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.10986666666666667, "grad_norm": 0.006842796690762043, "learning_rate": 2.6944444444444444e-06, "loss": 0.0317, "num_tokens": 22707996.0, "reward": 1.700600028038025, "reward_std": 0.4354501962661743, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7264589667320251, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8650033473968506, "step": 103 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.626170238975117, "calib/avg_num_step_conf": 4.49609375, "calib/ece": 0.18858823529411767, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.04906873614190699, "calib/mean_conf": 0.6709411764705883, "calib/mu_c": 0.6963414634146342, "calib/mu_w": 0.6472727272727272, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18858823529411767, "calib/std_conf": 0.12849008686332702, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 468.984375, "completions/mean_terminated_length": 470.82354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.11093333333333333, "grad_norm": 0.007150310557335615, "learning_rate": 2.666666666666667e-06, "loss": 0.0356, "num_tokens": 22934736.0, "reward": 1.622341275215149, "reward_std": 0.35085827112197876, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.719916820526123, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8944482803344727, "step": 104 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5058509612293449, "calib/avg_num_step_conf": 4.7890625, "calib/ece": 0.15303149606299216, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07086614173228346, "calib/gap": 0.0013418633061145213, "calib/mean_conf": 0.6801181102362205, "calib/mu_c": 0.6806622516556291, "calib/mu_w": 0.6793203883495146, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11933070866141737, "calib/std_conf": 0.15448793313085318, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 506.734375, "completions/mean_terminated_length": 508.7215881347656, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.112, "grad_norm": 0.007065868470817804, "learning_rate": 2.6388888888888893e-06, "loss": 0.0065, "num_tokens": 23170220.0, "reward": 1.7828137874603271, "reward_std": 0.4391106963157654, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7226855158805847, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8851315975189209, "step": 105 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5881953867028494, "calib/avg_num_step_conf": 4.48828125, "calib/ece": 0.16862745098039222, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": 0.04682681633156538, "calib/mean_conf": 0.6803921568627451, "calib/mu_c": 0.7026119402985075, "calib/mu_w": 0.6557851239669421, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.161764705882353, "calib/std_conf": 0.13174783972353601, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 461.94921875, "completions/mean_terminated_length": 463.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.11306666666666666, "grad_norm": 0.007637795992195606, "learning_rate": 2.6111111111111113e-06, "loss": 0.0118, "num_tokens": 23393063.0, "reward": 1.6857733726501465, "reward_std": 0.3476555049419403, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7268586158752441, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8912352919578552, "step": 106 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4945313002637844, "calib/avg_num_step_conf": 4.5703125, "calib/ece": 0.16032421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.06640625, "calib/gap": -0.008823972206137842, "calib/mean_conf": 0.6870429687499999, "calib/mu_c": 0.6836305732484077, "calib/mu_w": 0.6924545454545455, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11704296874999999, "calib/std_conf": 0.14601597899694582, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 458.0703125, "completions/mean_terminated_length": 459.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.11413333333333334, "grad_norm": 0.008233068510890007, "learning_rate": 2.5833333333333337e-06, "loss": 0.0101, "num_tokens": 23614945.0, "reward": 1.8233089447021484, "reward_std": 0.4427195191383362, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7291610240936279, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8921996355056763, "step": 107 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5618535655960806, "calib/avg_num_step_conf": 4.6640625, "calib/ece": 0.10795686274509793, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.024407185628742556, "calib/mean_conf": 0.7209843137254901, "calib/mu_c": 0.7294071856287425, "calib/mu_w": 0.705, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08701960784313714, "calib/std_conf": 0.13332304770015738, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1583.0, "completions/max_terminated_length": 1583.0, "completions/mean_length": 473.65234375, "completions/mean_terminated_length": 475.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.1152, "grad_norm": 0.006711322348564863, "learning_rate": 2.5555555555555557e-06, "loss": 0.0047, "num_tokens": 23839432.0, "reward": 1.8879101276397705, "reward_std": 0.43467026948928833, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7599049806594849, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8854851722717285, "step": 108 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.536313548710243, "calib/avg_num_step_conf": 4.8515625, "calib/ece": 0.20652173913043487, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08695652173913043, "calib/gap": 0.019758953168044102, "calib/mean_conf": 0.7195652173913044, "calib/mu_c": 0.7290151515151516, "calib/mu_w": 0.7092561983471075, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20217391304347834, "calib/std_conf": 0.14194933369207718, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2365.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 503.6015625, "completions/mean_terminated_length": 505.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.11626666666666667, "grad_norm": 0.007117453962564468, "learning_rate": 2.5277777777777778e-06, "loss": 0.0267, "num_tokens": 24072954.0, "reward": 1.6607688665390015, "reward_std": 0.3406231999397278, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6928331851959229, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8799298405647278, "step": 109 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4970926636149944, "calib/avg_num_step_conf": 4.3828125, "calib/ece": 0.19239215686274508, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.001516763577879332, "calib/mean_conf": 0.7174901960784315, "calib/mu_c": 0.7167883211678833, "calib/mu_w": 0.7183050847457626, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1863137254901961, "calib/std_conf": 0.1232245475527072, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 449.171875, "completions/mean_terminated_length": 450.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.11733333333333333, "grad_norm": 0.007710859179496765, "learning_rate": 2.5e-06, "loss": -0.0073, "num_tokens": 24292862.0, "reward": 1.7000811100006104, "reward_std": 0.5412262678146362, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7002187371253967, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.896980881690979, "step": 110 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5235912893700787, "calib/avg_num_step_conf": 4.56640625, "calib/ece": 0.21290196078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.014279035433070741, "calib/mean_conf": 0.7105490196078431, "calib/mu_c": 0.7177165354330707, "calib/mu_w": 0.7034374999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2127058823529412, "calib/std_conf": 0.12745353843586749, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 495.90625, "completions/mean_terminated_length": 495.90625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.1184, "grad_norm": 0.007386922836303711, "learning_rate": 2.4722222222222226e-06, "loss": 0.0464, "num_tokens": 24527222.0, "reward": 1.637666940689087, "reward_std": 0.4339905083179474, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6930207014083862, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8888974189758301, "step": 111 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6325248756218905, "calib/avg_num_step_conf": 4.5390625, "calib/ece": 0.16582677165354331, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.051181102362204724, "calib/gap": 0.06633333333333347, "calib/mean_conf": 0.6886614173228346, "calib/mu_c": 0.7200000000000001, "calib/mu_w": 0.6536666666666666, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16346456692913386, "calib/std_conf": 0.14771947346943018, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1087.0, "completions/max_terminated_length": 1087.0, "completions/mean_length": 472.19140625, "completions/mean_terminated_length": 475.9094543457031, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.11946666666666667, "grad_norm": 0.007241616956889629, "learning_rate": 2.4444444444444447e-06, "loss": -0.0253, "num_tokens": 24756023.0, "reward": 1.6869521141052246, "reward_std": 0.32682478427886963, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7303000688552856, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8925088047981262, "step": 112 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6635638297872339, "calib/avg_num_step_conf": 4.36328125, "calib/ece": 0.15328063241106726, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.05764057750759888, "calib/mean_conf": 0.7098023715415019, "calib/mu_c": 0.7353191489361702, "calib/mu_w": 0.6776785714285714, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15288537549407122, "calib/std_conf": 0.10540511193881581, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2571.0, "completions/max_terminated_length": 2571.0, "completions/mean_length": 432.546875, "completions/mean_terminated_length": 434.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.12053333333333334, "grad_norm": 0.007901332341134548, "learning_rate": 2.4166666666666667e-06, "loss": 0.0609, "num_tokens": 24971955.0, "reward": 1.7260768413543701, "reward_std": 0.5581738352775574, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7347862720489502, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8960837721824646, "step": 113 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.52540299818102, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.15988281249999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": 0.022055447531832084, "calib/mean_conf": 0.7181640624999999, "calib/mu_c": 0.7273825503355704, "calib/mu_w": 0.7053271028037383, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1480078125, "calib/std_conf": 0.1211893712686723, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1642.0, "completions/max_terminated_length": 1642.0, "completions/mean_length": 451.1015625, "completions/mean_terminated_length": 452.87060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.1216, "grad_norm": 0.00733104208484292, "learning_rate": 2.388888888888889e-06, "loss": 0.0409, "num_tokens": 25192461.0, "reward": 1.7813656330108643, "reward_std": 0.3606231212615967, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7342410087585449, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.899033784866333, "step": 114 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5454853412170485, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.2209411764705882, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": 0.021579822616408117, "calib/mean_conf": 0.7146666666666666, "calib/mu_c": 0.7250757575757576, "calib/mu_w": 0.7034959349593495, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2089803921568627, "calib/std_conf": 0.12425695493481724, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2858.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 441.8359375, "completions/mean_terminated_length": 441.8359375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.12266666666666666, "grad_norm": 0.008461576886475086, "learning_rate": 2.361111111111111e-06, "loss": 0.0394, "num_tokens": 25410835.0, "reward": 1.6725574731826782, "reward_std": 0.4955918788909912, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7040703296661377, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9002221822738647, "step": 115 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5469261778785588, "calib/avg_num_step_conf": 4.5390625, "calib/ece": 0.1417254901960785, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.018904006046863375, "calib/mean_conf": 0.7134901960784313, "calib/mu_c": 0.7214965986394558, "calib/mu_w": 0.7025925925925924, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1393725490196079, "calib/std_conf": 0.12002930566161171, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 496.7265625, "completions/mean_terminated_length": 496.7265625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.12373333333333333, "grad_norm": 0.007390998303890228, "learning_rate": 2.3333333333333336e-06, "loss": 0.0516, "num_tokens": 25642517.0, "reward": 1.7620733976364136, "reward_std": 0.4782768189907074, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7265383005142212, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8920679092407227, "step": 116 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.453277587890625, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.23218749999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": -0.016875000000000195, "calib/mean_conf": 0.71875, "calib/mu_c": 0.7103124999999999, "calib/mu_w": 0.7271875000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22546874999999997, "calib/std_conf": 0.11779218989389748, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 459.79296875, "completions/mean_terminated_length": 461.5960998535156, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1248, "grad_norm": 0.007319636642932892, "learning_rate": 2.305555555555556e-06, "loss": 0.0099, "num_tokens": 25866824.0, "reward": 1.637864351272583, "reward_std": 0.47429442405700684, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6729999780654907, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8940824270248413, "step": 117 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5011826953003423, "calib/avg_num_step_conf": 4.859375, "calib/ece": 0.19444881889763782, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": 0.008971677559913016, "calib/mean_conf": 0.7259448818897637, "calib/mu_c": 0.7301481481481482, "calib/mu_w": 0.7211764705882352, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19444881889763782, "calib/std_conf": 0.13003214666657753, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 466.37890625, "completions/mean_terminated_length": 468.2078857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.12586666666666665, "grad_norm": 0.007337419781833887, "learning_rate": 2.277777777777778e-06, "loss": 0.0379, "num_tokens": 26090225.0, "reward": 1.6856483221054077, "reward_std": 0.3241842985153198, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.695266842842102, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8988887071609497, "step": 118 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5068715642178911, "calib/avg_num_step_conf": 4.6171875, "calib/ece": 0.15708661417322833, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023622047244094488, "calib/gap": 0.00229010494752635, "calib/mean_conf": 0.6996062992125984, "calib/mu_c": 0.7006521739130435, "calib/mu_w": 0.6983620689655171, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15669291338582675, "calib/std_conf": 0.11311899925218569, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 512.45703125, "completions/mean_terminated_length": 514.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.12693333333333334, "grad_norm": 0.007050854153931141, "learning_rate": 2.25e-06, "loss": 0.0115, "num_tokens": 26326478.0, "reward": 1.7035629749298096, "reward_std": 0.5102789402008057, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7076945304870605, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8956202268600464, "step": 119 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5821460527221419, "calib/avg_num_step_conf": 4.53515625, "calib/ece": 0.07598425196850389, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.04384885401610572, "calib/mean_conf": 0.6977952755905512, "calib/mu_c": 0.712814371257485, "calib/mu_w": 0.6689655172413793, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05814960629921259, "calib/std_conf": 0.12801275395260564, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1062.0, "completions/max_terminated_length": 1062.0, "completions/mean_length": 448.9453125, "completions/mean_terminated_length": 452.4803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.128, "grad_norm": 0.007677591405808926, "learning_rate": 2.222222222222222e-06, "loss": 0.0064, "num_tokens": 26548096.0, "reward": 1.890402913093567, "reward_std": 0.40941351652145386, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.767189085483551, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9037976861000061, "step": 120 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4778687515390298, "calib/avg_num_step_conf": 4.74609375, "calib/ece": 0.23031372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": -0.007226052696380103, "calib/mean_conf": 0.6958039215686275, "calib/mu_c": 0.6922900763358778, "calib/mu_w": 0.6995161290322579, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20619607843137255, "calib/std_conf": 0.12692764954853383, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2383.0, "completions/max_terminated_length": 2383.0, "completions/mean_length": 502.6171875, "completions/mean_terminated_length": 502.6171875, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.12906666666666666, "grad_norm": 0.0072189816273748875, "learning_rate": 2.1944444444444445e-06, "loss": 0.0204, "num_tokens": 26781822.0, "reward": 1.6658300161361694, "reward_std": 0.4628363847732544, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6945909857749939, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9062290191650391, "step": 121 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6797278121775026, "calib/avg_num_step_conf": 4.54296875, "calib/ece": 0.0907086614173228, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.0847265221878224, "calib/mean_conf": 0.6891338582677166, "calib/mu_c": 0.723157894736842, "calib/mu_w": 0.6384313725490196, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0907086614173228, "calib/std_conf": 0.12751423340066567, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2624.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 472.09375, "completions/mean_terminated_length": 473.94512939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.13013333333333332, "grad_norm": 0.007661870680749416, "learning_rate": 2.166666666666667e-06, "loss": 0.0113, "num_tokens": 27010022.0, "reward": 1.8057467937469482, "reward_std": 0.43043383955955505, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7698593735694885, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9062525033950806, "step": 122 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5482461538461539, "calib/avg_num_step_conf": 4.3515625, "calib/ece": 0.20866666666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.01661538461538481, "calib/mean_conf": 0.7184705882352941, "calib/mu_c": 0.7266153846153847, "calib/mu_w": 0.7099999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.20866666666666667, "calib/std_conf": 0.10755579545030615, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1879.0, "completions/max_terminated_length": 1879.0, "completions/mean_length": 501.75390625, "completions/mean_terminated_length": 501.75390625, "completions/min_length": 242.0, "completions/min_terminated_length": 242.0, "epoch": 0.1312, "grad_norm": 0.0073029338382184505, "learning_rate": 2.138888888888889e-06, "loss": 0.0064, "num_tokens": 27243759.0, "reward": 1.645280122756958, "reward_std": 0.46035170555114746, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6940886974334717, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8870315551757812, "step": 123 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5546541132478631, "calib/avg_num_step_conf": 4.45703125, "calib/ece": 0.13226190476190472, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.01400641025641014, "calib/mean_conf": 0.7140873015873015, "calib/mu_c": 0.7194230769230768, "calib/mu_w": 0.7054166666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11365079365079361, "calib/std_conf": 0.10809846480278325, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1067.0, "completions/max_terminated_length": 1067.0, "completions/mean_length": 476.91015625, "completions/mean_terminated_length": 478.7804260253906, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.13226666666666667, "grad_norm": 0.007507139816880226, "learning_rate": 2.1111111111111114e-06, "loss": -0.0072, "num_tokens": 27472664.0, "reward": 1.8107237815856934, "reward_std": 0.401524156332016, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7354112863540649, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8902963399887085, "step": 124 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4849450274862569, "calib/avg_num_step_conf": 4.453125, "calib/ece": 0.22370078740157487, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03937007874015748, "calib/gap": -0.007656171914042886, "calib/mean_conf": 0.7120472440944883, "calib/mu_c": 0.7085507246376812, "calib/mu_w": 0.7162068965517241, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19622047244094498, "calib/std_conf": 0.11846683699999311, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2106.0, "completions/max_terminated_length": 2106.0, "completions/mean_length": 504.73046875, "completions/mean_terminated_length": 504.73046875, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.13333333333333333, "grad_norm": 0.006698730401694775, "learning_rate": 2.0833333333333334e-06, "loss": 0.0289, "num_tokens": 27706683.0, "reward": 1.7050342559814453, "reward_std": 0.5200086832046509, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.700056254863739, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.901330828666687, "step": 125 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6516716754320061, "calib/avg_num_step_conf": 4.4921875, "calib/ece": 0.19229249011857708, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05138339920948617, "calib/gap": 0.06253443526170799, "calib/mean_conf": 0.7140316205533596, "calib/mu_c": 0.743939393939394, "calib/mu_w": 0.681404958677686, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19229249011857708, "calib/std_conf": 0.12270804302194291, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 479.03515625, "completions/mean_terminated_length": 480.91375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.1344, "grad_norm": 0.006964969914406538, "learning_rate": 2.0555555555555555e-06, "loss": 0.0102, "num_tokens": 27934780.0, "reward": 1.6716426610946655, "reward_std": 0.4074101448059082, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7178597450256348, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8905860185623169, "step": 126 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5591517857142858, "calib/avg_num_step_conf": 4.37109375, "calib/ece": 0.18771653543307087, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.020229414682539915, "calib/mean_conf": 0.675511811023622, "calib/mu_c": 0.685546875, "calib/mu_w": 0.6653174603174601, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.17964566929133857, "calib/std_conf": 0.12615893401639564, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2581.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 468.953125, "completions/mean_terminated_length": 468.953125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.13546666666666668, "grad_norm": 0.00796822365373373, "learning_rate": 2.027777777777778e-06, "loss": 0.0523, "num_tokens": 28158504.0, "reward": 1.647589087486267, "reward_std": 0.4065118432044983, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7053730487823486, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.908420741558075, "step": 127 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.603496062992126, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.14055555555555554, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04899653543307103, "calib/mean_conf": 0.6365873015873017, "calib/mu_c": 0.66128, "calib/mu_w": 0.612283464566929, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14055555555555554, "calib/std_conf": 0.13243844151578976, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2386.0, "completions/max_terminated_length": 2386.0, "completions/mean_length": 498.8359375, "completions/mean_terminated_length": 502.7637634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.13653333333333334, "grad_norm": 0.007659838069230318, "learning_rate": 2.0000000000000003e-06, "loss": -0.017, "num_tokens": 28392870.0, "reward": 1.639002799987793, "reward_std": 0.5024175643920898, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7256976366043091, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9084383845329285, "step": 128 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5661844025636146, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.100234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": 0.03288533536391913, "calib/mean_conf": 0.690625, "calib/mu_c": 0.7038562091503268, "calib/mu_w": 0.6709708737864076, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09660156250000002, "calib/std_conf": 0.13231561463032246, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 441.421875, "completions/mean_terminated_length": 443.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.1376, "grad_norm": 0.007797915954142809, "learning_rate": 1.9722222222222224e-06, "loss": 0.0022, "num_tokens": 28608258.0, "reward": 1.8130768537521362, "reward_std": 0.4100300967693329, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7492015361785889, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.917168378829956, "step": 129 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5470162748643761, "calib/avg_num_step_conf": 4.3203125, "calib/ece": 0.13023437499999987, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.03660036166365288, "calib/mean_conf": 0.6768749999999999, "calib/mu_c": 0.690886075949367, "calib/mu_w": 0.6542857142857141, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09496093749999993, "calib/std_conf": 0.1444669620536128, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 983.0, "completions/max_terminated_length": 983.0, "completions/mean_length": 440.83203125, "completions/mean_terminated_length": 442.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.13866666666666666, "grad_norm": 0.008532690815627575, "learning_rate": 1.944444444444445e-06, "loss": 0.0038, "num_tokens": 28826399.0, "reward": 1.841002106666565, "reward_std": 0.30434200167655945, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7529324293136597, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9157634973526001, "step": 130 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5894105894105894, "calib/avg_num_step_conf": 4.2890625, "calib/ece": 0.25133333333333335, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.04025786713286705, "calib/mean_conf": 0.690549019607843, "calib/mu_c": 0.7131249999999999, "calib/mu_w": 0.6728671328671328, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25133333333333335, "calib/std_conf": 0.11641917396918036, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2895.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 454.296875, "completions/mean_terminated_length": 454.296875, "completions/min_length": 226.0, "completions/min_terminated_length": 226.0, "epoch": 0.13973333333333332, "grad_norm": 0.007329374551773071, "learning_rate": 1.916666666666667e-06, "loss": 0.0185, "num_tokens": 29048907.0, "reward": 1.5587986707687378, "reward_std": 0.3782535791397095, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6940823793411255, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9239246845245361, "step": 131 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5577631578947367, "calib/avg_num_step_conf": 4.4765625, "calib/ece": 0.08654901960784306, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.028427631578947454, "calib/mean_conf": 0.7067843137254902, "calib/mu_c": 0.717375, "calib/mu_w": 0.6889473684210525, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08294117647058816, "calib/std_conf": 0.11680993674584471, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 475.59375, "completions/mean_terminated_length": 477.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1408, "grad_norm": 0.0073455991223454475, "learning_rate": 1.888888888888889e-06, "loss": 0.025, "num_tokens": 29276251.0, "reward": 1.8585619926452637, "reward_std": 0.4749313294887543, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7566285133361816, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9119948744773865, "step": 132 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6084105441733003, "calib/avg_num_step_conf": 4.28125, "calib/ece": 0.33207843137254894, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.04260015659663319, "calib/mean_conf": 0.7124705882352942, "calib/mu_c": 0.7388659793814433, "calib/mu_w": 0.6962658227848101, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33207843137254894, "calib/std_conf": 0.11152672289304398, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1927.0, "completions/max_terminated_length": 1927.0, "completions/mean_length": 513.99609375, "completions/mean_terminated_length": 513.99609375, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.14186666666666667, "grad_norm": 0.007127861492335796, "learning_rate": 1.8611111111111113e-06, "loss": 0.0471, "num_tokens": 29514178.0, "reward": 1.45475435256958, "reward_std": 0.47963637113571167, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6553382873535156, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9058670401573181, "step": 133 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5925682444277486, "calib/avg_num_step_conf": 4.0078125, "calib/ece": 0.2086166007905139, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.03940771349862249, "calib/mean_conf": 0.6868774703557312, "calib/mu_c": 0.7074380165289256, "calib/mu_w": 0.6680303030303031, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2086166007905139, "calib/std_conf": 0.13051181100794376, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2451.0, "completions/max_terminated_length": 2451.0, "completions/mean_length": 543.16015625, "completions/mean_terminated_length": 543.16015625, "completions/min_length": 214.0, "completions/min_terminated_length": 214.0, "epoch": 0.14293333333333333, "grad_norm": 0.006815467029809952, "learning_rate": 1.8333333333333333e-06, "loss": 0.0443, "num_tokens": 29762179.0, "reward": 1.5914236307144165, "reward_std": 0.5106499195098877, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6931988596916199, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8912457227706909, "step": 134 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5301959325396826, "calib/avg_num_step_conf": 4.05078125, "calib/ece": 0.14402343750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.011081349206349178, "calib/mean_conf": 0.6891796875, "calib/mu_c": 0.6940277777777778, "calib/mu_w": 0.6829464285714286, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.13535156250000002, "calib/std_conf": 0.11849195209972002, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 493.9609375, "completions/mean_terminated_length": 495.8980712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.144, "grad_norm": 0.007231003139168024, "learning_rate": 1.8055555555555557e-06, "loss": 0.0076, "num_tokens": 29994513.0, "reward": 1.7483104467391968, "reward_std": 0.5153388977050781, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7237054705619812, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9101613163948059, "step": 135 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5537567968363816, "calib/avg_num_step_conf": 4.2578125, "calib/ece": 0.24686274509803918, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.02223739495798316, "calib/mean_conf": 0.7066274509803921, "calib/mu_c": 0.7184873949579832, "calib/mu_w": 0.69625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24341176470588233, "calib/std_conf": 0.1304366664413271, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 995.0, "completions/max_terminated_length": 995.0, "completions/mean_length": 454.2578125, "completions/mean_terminated_length": 456.03924560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.14506666666666668, "grad_norm": 0.007509256713092327, "learning_rate": 1.777777777777778e-06, "loss": 0.0082, "num_tokens": 30219291.0, "reward": 1.5949571132659912, "reward_std": 0.3979659080505371, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.684899628162384, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9136791229248047, "step": 136 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6786153846153846, "calib/avg_num_step_conf": 4.296875, "calib/ece": 0.21956862745098038, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.07330769230769252, "calib/mean_conf": 0.7293725490196079, "calib/mu_c": 0.7653076923076924, "calib/mu_w": 0.6919999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.21956862745098038, "calib/std_conf": 0.1153426201833364, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 454.46484375, "completions/mean_terminated_length": 456.2471008300781, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.14613333333333334, "grad_norm": 0.007360511925071478, "learning_rate": 1.75e-06, "loss": -0.0025, "num_tokens": 30442618.0, "reward": 1.6539958715438843, "reward_std": 0.3815937042236328, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7103238105773926, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8978472948074341, "step": 137 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6042407660738713, "calib/avg_num_step_conf": 4.32421875, "calib/ece": 0.08468749999999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": 0.042098495212038345, "calib/mean_conf": 0.7213281250000001, "calib/mu_c": 0.7354705882352941, "calib/mu_w": 0.6933720930232558, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07097656249999998, "calib/std_conf": 0.12055452017234514, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1149.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 463.515625, "completions/mean_terminated_length": 465.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.1472, "grad_norm": 0.007477740757167339, "learning_rate": 1.7222222222222224e-06, "loss": 0.0101, "num_tokens": 30665614.0, "reward": 1.9196381568908691, "reward_std": 0.473527193069458, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7778867483139038, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9162907004356384, "step": 138 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5980053942168978, "calib/avg_num_step_conf": 4.10546875, "calib/ece": 0.14054687499999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01953125, "calib/gap": 0.04040895690898805, "calib/mean_conf": 0.7196874999999999, "calib/mu_c": 0.7365771812080536, "calib/mu_w": 0.6961682242990656, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13910156249999994, "calib/std_conf": 0.11381136737492437, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1039.0, "completions/max_terminated_length": 1039.0, "completions/mean_length": 430.80859375, "completions/mean_terminated_length": 432.4980773925781, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.14826666666666666, "grad_norm": 0.007508372887969017, "learning_rate": 1.6944444444444446e-06, "loss": -0.0199, "num_tokens": 30878997.0, "reward": 1.7896010875701904, "reward_std": 0.43053174018859863, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7444875240325928, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9217291474342346, "step": 139 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6574622188174664, "calib/avg_num_step_conf": 3.95703125, "calib/ece": 0.06691406249999986, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.06244097778396818, "calib/mean_conf": 0.7247265625, "calib/mu_c": 0.7449710982658959, "calib/mu_w": 0.6825301204819277, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05792968749999987, "calib/std_conf": 0.12489653310614186, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 458.50390625, "completions/mean_terminated_length": 460.302001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.14933333333333335, "grad_norm": 0.007245223503559828, "learning_rate": 1.6666666666666667e-06, "loss": 0.016, "num_tokens": 31101390.0, "reward": 1.9366732835769653, "reward_std": 0.3448790907859802, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7867851257324219, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9130330085754395, "step": 140 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7461554164398476, "calib/avg_num_step_conf": 4.16015625, "calib/ece": 0.07121568627450975, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.12020073489384864, "calib/mean_conf": 0.7213333333333333, "calib/mu_c": 0.762814371257485, "calib/mu_w": 0.6426136363636363, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06882352941176464, "calib/std_conf": 0.1383763930148161, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1049.0, "completions/max_terminated_length": 1049.0, "completions/mean_length": 484.69140625, "completions/mean_terminated_length": 486.5921936035156, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.1504, "grad_norm": 0.007165694609284401, "learning_rate": 1.638888888888889e-06, "loss": 0.0072, "num_tokens": 31332567.0, "reward": 1.901097297668457, "reward_std": 0.362898051738739, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7994171977043152, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.906534731388092, "step": 141 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6126891590176136, "calib/avg_num_step_conf": 4.09765625, "calib/ece": 0.19619607843137254, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.052981890349789174, "calib/mean_conf": 0.741294117647059, "calib/mu_c": 0.7653956834532374, "calib/mu_w": 0.7124137931034482, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19619607843137254, "calib/std_conf": 0.11975912002074422, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 465.8828125, "completions/mean_terminated_length": 467.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.15146666666666667, "grad_norm": 0.007731846533715725, "learning_rate": 1.6111111111111113e-06, "loss": 0.0167, "num_tokens": 31556993.0, "reward": 1.7208367586135864, "reward_std": 0.3604811429977417, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7226402759552002, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.910706639289856, "step": 142 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5276494780132869, "calib/avg_num_step_conf": 4.07421875, "calib/ece": 0.17389763779527553, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.012524517557734871, "calib/mean_conf": 0.7353149606299212, "calib/mu_c": 0.7406896551724137, "calib/mu_w": 0.7281651376146788, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.16917322834645662, "calib/std_conf": 0.11812719694494285, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2122.0, "completions/max_terminated_length": 2122.0, "completions/mean_length": 498.69921875, "completions/mean_terminated_length": 498.69921875, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.15253333333333333, "grad_norm": 0.007055537775158882, "learning_rate": 1.5833333333333333e-06, "loss": 0.0326, "num_tokens": 31791996.0, "reward": 1.7306500673294067, "reward_std": 0.4198153614997864, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7007426023483276, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8859198093414307, "step": 143 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6412354620795321, "calib/avg_num_step_conf": 4.11328125, "calib/ece": 0.06804687499999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03125, "calib/gap": 0.06610348910091224, "calib/mean_conf": 0.743828125, "calib/mu_c": 0.7652601156069363, "calib/mu_w": 0.6991566265060241, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06804687499999994, "calib/std_conf": 0.12284467513483999, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 446.578125, "completions/mean_terminated_length": 448.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.1536, "grad_norm": 0.008307057432830334, "learning_rate": 1.5555555555555558e-06, "loss": 0.0113, "num_tokens": 32010448.0, "reward": 1.9410185813903809, "reward_std": 0.3637969493865967, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7901445031166077, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9192425012588501, "step": 144 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5532927163198248, "calib/avg_num_step_conf": 4.15625, "calib/ece": 0.1580314960629921, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.010372398685651651, "calib/mean_conf": 0.7649606299212598, "calib/mu_c": 0.7685542168674698, "calib/mu_w": 0.7581818181818182, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1347244094488189, "calib/std_conf": 0.12377509458913, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 447.35546875, "completions/mean_terminated_length": 449.1098327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.15466666666666667, "grad_norm": 0.007844759151339531, "learning_rate": 1.527777777777778e-06, "loss": -0.0049, "num_tokens": 32227675.0, "reward": 1.8811393976211548, "reward_std": 0.4249289035797119, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7446749806404114, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9048829078674316, "step": 145 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6414623494915466, "calib/avg_num_step_conf": 3.875, "calib/ece": 0.2798425196850393, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.0767708528292469, "calib/mean_conf": 0.7404724409448818, "calib/mu_c": 0.7818803418803417, "calib/mu_w": 0.7051094890510948, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2798425196850393, "calib/std_conf": 0.14709715182539784, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1064.0, "completions/max_terminated_length": 1064.0, "completions/mean_length": 485.48046875, "completions/mean_terminated_length": 487.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.15573333333333333, "grad_norm": 0.008200140669941902, "learning_rate": 1.5e-06, "loss": 0.012, "num_tokens": 32459174.0, "reward": 1.5805511474609375, "reward_std": 0.40894848108291626, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6843593120574951, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9112821817398071, "step": 146 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6325720265944348, "calib/avg_num_step_conf": 4.09765625, "calib/ece": 0.2608627450980392, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.06026409751292794, "calib/mean_conf": 0.7745882352941176, "calib/mu_c": 0.8038931297709923, "calib/mu_w": 0.7436290322580643, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2608627450980392, "calib/std_conf": 0.11827405471384854, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2334.0, "completions/max_terminated_length": 2334.0, "completions/mean_length": 467.71875, "completions/mean_terminated_length": 467.71875, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.1568, "grad_norm": 0.008043180219829082, "learning_rate": 1.4722222222222225e-06, "loss": 0.034, "num_tokens": 32682590.0, "reward": 1.6648333072662354, "reward_std": 0.3625674545764923, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6928074359893799, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9118380546569824, "step": 147 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6541204694287751, "calib/avg_num_step_conf": 4.1015625, "calib/ece": 0.15777343749999992, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": 0.07320171172923517, "calib/mean_conf": 0.7788671875, "calib/mu_c": 0.8066037735849053, "calib/mu_w": 0.7334020618556701, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15777343749999992, "calib/std_conf": 0.12352206730313349, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 451.91015625, "completions/mean_terminated_length": 453.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.15786666666666666, "grad_norm": 0.008227191865444183, "learning_rate": 1.4444444444444445e-06, "loss": -0.0054, "num_tokens": 32903391.0, "reward": 1.8450043201446533, "reward_std": 0.4270305633544922, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.755149245262146, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9061184525489807, "step": 148 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.507754846779237, "calib/avg_num_step_conf": 4.0234375, "calib/ece": 0.3051778656126481, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08300395256916997, "calib/gap": 0.011647904940587761, "calib/mean_conf": 0.7860474308300396, "calib/mu_c": 0.7920325203252031, "calib/mu_w": 0.7803846153846153, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3025296442687746, "calib/std_conf": 0.13556276248458035, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 502.640625, "completions/mean_terminated_length": 504.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.15893333333333334, "grad_norm": 0.007383955176919699, "learning_rate": 1.4166666666666667e-06, "loss": 0.059, "num_tokens": 33136523.0, "reward": 1.6020516157150269, "reward_std": 0.48702579736709595, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6401144862174988, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9087169170379639, "step": 149 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6135663235197693, "calib/avg_num_step_conf": 4.05078125, "calib/ece": 0.20349206349206345, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07936507936507936, "calib/gap": 0.03701790046554343, "calib/mean_conf": 0.8015873015873016, "calib/mu_c": 0.8164238410596026, "calib/mu_w": 0.7794059405940592, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.20293650793650791, "calib/std_conf": 0.11184181501022798, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 460.48046875, "completions/mean_terminated_length": 462.28631591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.16, "grad_norm": 0.007063120137900114, "learning_rate": 1.3888888888888892e-06, "loss": 0.0085, "num_tokens": 33359366.0, "reward": 1.7778428792953491, "reward_std": 0.44000566005706787, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7124598026275635, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8989115357398987, "step": 150 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5533248081841432, "calib/avg_num_step_conf": 3.765625, "calib/ece": 0.3199203187250995, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.043824701195219126, "calib/gap": 0.03450319693094617, "calib/mean_conf": 0.7780876494023905, "calib/mu_c": 0.7967826086956522, "calib/mu_w": 0.762279411764706, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3199203187250995, "calib/std_conf": 0.13798900111652393, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2223.0, "completions/max_terminated_length": 2223.0, "completions/mean_length": 480.609375, "completions/mean_terminated_length": 482.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.16106666666666666, "grad_norm": 0.007321125827729702, "learning_rate": 1.3611111111111112e-06, "loss": 0.0082, "num_tokens": 33589426.0, "reward": 1.5496189594268799, "reward_std": 0.44075965881347656, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6348445415496826, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9073817133903503, "step": 151 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.560031746031746, "calib/avg_num_step_conf": 3.734375, "calib/ece": 0.2831872509960158, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.07569721115537849, "calib/gap": 0.03167873015873013, "calib/mean_conf": 0.7843824701195219, "calib/mu_c": 0.80015873015873, "calib/mu_w": 0.7684799999999998, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.2827888446215139, "calib/std_conf": 0.1388538640055688, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 469.15234375, "completions/mean_terminated_length": 472.8464660644531, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.16213333333333332, "grad_norm": 0.007486861664801836, "learning_rate": 1.3333333333333334e-06, "loss": 0.002, "num_tokens": 33814921.0, "reward": 1.5980477333068848, "reward_std": 0.5343087911605835, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6394152045249939, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8855881690979004, "step": 152 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5712327893931668, "calib/avg_num_step_conf": 3.8125, "calib/ece": 0.22196850393700773, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0984251968503937, "calib/gap": 0.03161142274349815, "calib/mean_conf": 0.8046456692913385, "calib/mu_c": 0.8178378378378378, "calib/mu_w": 0.7862264150943397, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.22196850393700773, "calib/std_conf": 0.12096264325783446, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1086.0, "completions/max_terminated_length": 1086.0, "completions/mean_length": 467.07421875, "completions/mean_terminated_length": 468.9059143066406, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.1632, "grad_norm": 0.007297352887690067, "learning_rate": 1.3055555555555556e-06, "loss": 0.0178, "num_tokens": 34041812.0, "reward": 1.7645387649536133, "reward_std": 0.4808223247528076, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6989551186561584, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9138876795768738, "step": 153 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5651244890375325, "calib/avg_num_step_conf": 3.62890625, "calib/ece": 0.32023529411764695, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.0416963953920475, "calib/mean_conf": 0.7790588235294119, "calib/mu_c": 0.8016239316239315, "calib/mu_w": 0.759927536231884, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32023529411764695, "calib/std_conf": 0.14244605607995456, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 441.66015625, "completions/mean_terminated_length": 443.3921813964844, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.16426666666666667, "grad_norm": 0.008471459150314331, "learning_rate": 1.2777777777777779e-06, "loss": -0.0134, "num_tokens": 34259317.0, "reward": 1.5774880647659302, "reward_std": 0.4608520269393921, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6470234394073486, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9285538196563721, "step": 154 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5584041168082337, "calib/avg_num_step_conf": 3.90625, "calib/ece": 0.26677165354330695, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.026614173228346027, "calib/mean_conf": 0.7585039370078739, "calib/mu_c": 0.7718110236220469, "calib/mu_w": 0.7451968503937009, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2626377952755904, "calib/std_conf": 0.1507152435229073, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 967.0, "completions/max_terminated_length": 967.0, "completions/mean_length": 443.65625, "completions/mean_terminated_length": 445.3961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.16533333333333333, "grad_norm": 0.0080947894603014, "learning_rate": 1.25e-06, "loss": 0.0004, "num_tokens": 34480109.0, "reward": 1.6442118883132935, "reward_std": 0.44987744092941284, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6685038805007935, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9239685535430908, "step": 155 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5989190548014077, "calib/avg_num_step_conf": 4.12109375, "calib/ece": 0.31901185770750984, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09881422924901186, "calib/gap": 0.07836915535444933, "calib/mean_conf": 0.7814624505928853, "calib/mu_c": 0.8235897435897434, "calib/mu_w": 0.745220588235294, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31901185770750984, "calib/std_conf": 0.16042629589979623, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 470.83203125, "completions/mean_terminated_length": 472.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.1664, "grad_norm": 0.007263018749654293, "learning_rate": 1.2222222222222223e-06, "loss": -0.0302, "num_tokens": 34705402.0, "reward": 1.5716431140899658, "reward_std": 0.4974963068962097, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.655100405216217, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9127222299575806, "step": 156 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6819584646048994, "calib/avg_num_step_conf": 3.99609375, "calib/ece": 0.18440944881889754, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.13385826771653545, "calib/gap": 0.11536809618723087, "calib/mean_conf": 0.7773228346456693, "calib/mu_c": 0.8241059602649007, "calib/mu_w": 0.7087378640776698, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.18362204724409437, "calib/std_conf": 0.1615717525152909, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 960.0, "completions/max_terminated_length": 960.0, "completions/mean_length": 445.63671875, "completions/mean_terminated_length": 447.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.16746666666666668, "grad_norm": 0.007417924702167511, "learning_rate": 1.1944444444444446e-06, "loss": 0.007, "num_tokens": 34923213.0, "reward": 1.7946419715881348, "reward_std": 0.45173823833465576, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7452230453491211, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9177197217941284, "step": 157 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5686170212765956, "calib/avg_num_step_conf": 3.91796875, "calib/ece": 0.20771653543307084, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2204724409448819, "calib/gap": 0.05993617021276609, "calib/mean_conf": 0.8288188976377951, "calib/mu_c": 0.851, "calib/mu_w": 0.7910638297872339, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20330708661417318, "calib/std_conf": 0.13153014787629336, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2685.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 443.3828125, "completions/mean_terminated_length": 445.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.16853333333333334, "grad_norm": 0.007617747876793146, "learning_rate": 1.1666666666666668e-06, "loss": 0.0454, "num_tokens": 35141959.0, "reward": 1.8422807455062866, "reward_std": 0.536354660987854, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7310367226600647, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9193363189697266, "step": 158 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5075898967276347, "calib/avg_num_step_conf": 3.84375, "calib/ece": 0.22454901960784301, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.10196078431372549, "calib/gap": 0.01061776782381485, "calib/mean_conf": 0.7556078431372547, "calib/mu_c": 0.760354609929078, "calib/mu_w": 0.7497368421052631, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21360784313725475, "calib/std_conf": 0.1636313501808703, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 442.62109375, "completions/mean_terminated_length": 442.62109375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.1696, "grad_norm": 0.007485765963792801, "learning_rate": 1.138888888888889e-06, "loss": 0.0209, "num_tokens": 35360054.0, "reward": 1.729485273361206, "reward_std": 0.40719637274742126, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6875070333480835, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9335594773292542, "step": 159 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6138448969331322, "calib/avg_num_step_conf": 3.7265625, "calib/ece": 0.25837944664031615, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.22924901185770752, "calib/gap": 0.07113122171945707, "calib/mean_conf": 0.7959288537549407, "calib/mu_c": 0.8288235294117647, "calib/mu_w": 0.7576923076923077, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25837944664031615, "calib/std_conf": 0.1656500754825719, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 452.23046875, "completions/mean_terminated_length": 454.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.17066666666666666, "grad_norm": 0.008462823927402496, "learning_rate": 1.111111111111111e-06, "loss": 0.0429, "num_tokens": 35580665.0, "reward": 1.6944373846054077, "reward_std": 0.4696410298347473, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6844590306282043, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9292279481887817, "step": 160 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6363798898662661, "calib/avg_num_step_conf": 3.90625, "calib/ece": 0.11937500000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.1875, "calib/gap": 0.08837874561968084, "calib/mean_conf": 0.76515625, "calib/mu_c": 0.7924293785310734, "calib/mu_w": 0.7040506329113926, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09656250000000004, "calib/std_conf": 0.17892467154067238, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 437.55078125, "completions/mean_terminated_length": 439.2666931152344, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.17173333333333332, "grad_norm": 0.007638855371624231, "learning_rate": 1.0833333333333335e-06, "loss": 0.0357, "num_tokens": 35796598.0, "reward": 1.9639207124710083, "reward_std": 0.4240199625492096, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7832546830177307, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9318031072616577, "step": 161 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.576569264069264, "calib/avg_num_step_conf": 3.71484375, "calib/ece": 0.17574218750000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.28125, "calib/gap": 0.04588203463203455, "calib/mean_conf": 0.7623828124999998, "calib/mu_c": 0.7781547619047618, "calib/mu_w": 0.7322727272727273, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.14093750000000005, "calib/std_conf": 0.20121939290135493, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1058.0, "completions/max_terminated_length": 1058.0, "completions/mean_length": 425.61328125, "completions/mean_terminated_length": 427.2823791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.1728, "grad_norm": 0.008791333064436913, "learning_rate": 1.0555555555555557e-06, "loss": 0.0266, "num_tokens": 36009699.0, "reward": 1.901847243309021, "reward_std": 0.41961348056793213, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7394648790359497, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9382367134094238, "step": 162 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6198596404826399, "calib/avg_num_step_conf": 3.671875, "calib/ece": 0.2371764705882352, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.23921568627450981, "calib/gap": 0.08601822211278021, "calib/mean_conf": 0.7487058823529411, "calib/mu_c": 0.7905343511450382, "calib/mu_w": 0.704516129032258, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23607843137254897, "calib/std_conf": 0.20200827920498382, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1266.0, "completions/max_terminated_length": 1266.0, "completions/mean_length": 462.83984375, "completions/mean_terminated_length": 462.83984375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.17386666666666667, "grad_norm": 0.0074338410049676895, "learning_rate": 1.0277777777777777e-06, "loss": 0.0123, "num_tokens": 36233018.0, "reward": 1.6685981750488281, "reward_std": 0.4071301817893982, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6907016038894653, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9290037155151367, "step": 163 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5939094189374144, "calib/avg_num_step_conf": 3.66796875, "calib/ece": 0.1917254901960785, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.23137254901960785, "calib/gap": 0.07571855169839492, "calib/mean_conf": 0.7327450980392157, "calib/mu_c": 0.7665957446808511, "calib/mu_w": 0.6908771929824562, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18576470588235305, "calib/std_conf": 0.2040019072514894, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1710.0, "completions/max_terminated_length": 1710.0, "completions/mean_length": 476.5, "completions/mean_terminated_length": 478.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.17493333333333333, "grad_norm": 0.0079426234588027, "learning_rate": 1.0000000000000002e-06, "loss": 0.0127, "num_tokens": 36461138.0, "reward": 1.7319530248641968, "reward_std": 0.4190503656864166, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7105636596679688, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9281857013702393, "step": 164 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5508837209302326, "calib/avg_num_step_conf": 3.8359375, "calib/ece": 0.275748031496063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2283464566929134, "calib/gap": 0.0350337984496123, "calib/mean_conf": 0.7620472440944882, "calib/mu_c": 0.77984, "calib/mu_w": 0.7448062015503877, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.27283464566929133, "calib/std_conf": 0.18649201955726338, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2592.0, "completions/max_terminated_length": 2592.0, "completions/mean_length": 461.8046875, "completions/mean_terminated_length": 461.8046875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.176, "grad_norm": 0.007690636441111565, "learning_rate": 9.722222222222224e-07, "loss": 0.0221, "num_tokens": 36684936.0, "reward": 1.6147657632827759, "reward_std": 0.4274684488773346, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6454948782920837, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9229432344436646, "step": 165 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6298540249433107, "calib/avg_num_step_conf": 3.71484375, "calib/ece": 0.11531746031746046, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2777777777777778, "calib/gap": 0.10089285714285712, "calib/mean_conf": 0.7366666666666666, "calib/mu_c": 0.7702976190476191, "calib/mu_w": 0.669404761904762, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.09265873015873027, "calib/std_conf": 0.20488479147505745, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 473.66796875, "completions/mean_terminated_length": 477.39764404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.17706666666666668, "grad_norm": 0.007974786683917046, "learning_rate": 9.444444444444445e-07, "loss": -0.0102, "num_tokens": 36912379.0, "reward": 1.8942294120788574, "reward_std": 0.4324895143508911, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7597531080245972, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9187272787094116, "step": 166 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5789037645811241, "calib/avg_num_step_conf": 3.7421875, "calib/ece": 0.17070312499999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.30078125, "calib/gap": 0.06559384941675517, "calib/mean_conf": 0.742890625, "calib/mu_c": 0.7664634146341465, "calib/mu_w": 0.7008695652173913, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.136484375, "calib/std_conf": 0.2149205970518167, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 993.0, "completions/max_terminated_length": 993.0, "completions/mean_length": 446.140625, "completions/mean_terminated_length": 447.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.17813333333333334, "grad_norm": 0.008287301287055016, "learning_rate": 9.166666666666666e-07, "loss": 0.0091, "num_tokens": 37132199.0, "reward": 1.8784657716751099, "reward_std": 0.394712895154953, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7408288717269897, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9370966553688049, "step": 167 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.599137104506232, "calib/avg_num_step_conf": 3.49609375, "calib/ece": 0.1996456692913386, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.32677165354330706, "calib/gap": 0.06027165228507514, "calib/mean_conf": 0.7601181102362204, "calib/mu_c": 0.7850335570469799, "calib/mu_w": 0.7247619047619047, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.18657480314960628, "calib/std_conf": 0.2056820462758194, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2449.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 473.3046875, "completions/mean_terminated_length": 473.3046875, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.1792, "grad_norm": 0.007830573245882988, "learning_rate": 8.88888888888889e-07, "loss": 0.0542, "num_tokens": 37358037.0, "reward": 1.774470329284668, "reward_std": 0.5710139274597168, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7054336071014404, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9315104484558105, "step": 168 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5640266699900299, "calib/avg_num_step_conf": 3.59765625, "calib/ece": 0.2568503937007873, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.33858267716535434, "calib/gap": 0.06924476570289129, "calib/mean_conf": 0.7494488188976378, "calib/mu_c": 0.7816176470588235, "calib/mu_w": 0.7123728813559322, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23543307086614168, "calib/std_conf": 0.23326274289524773, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1054.0, "completions/max_terminated_length": 1054.0, "completions/mean_length": 449.08984375, "completions/mean_terminated_length": 452.6259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.18026666666666666, "grad_norm": 0.008146989159286022, "learning_rate": 8.611111111111112e-07, "loss": -0.0008, "num_tokens": 37577188.0, "reward": 1.6961153745651245, "reward_std": 0.40185391902923584, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6801343560218811, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9324523210525513, "step": 169 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7114549045424621, "calib/avg_num_step_conf": 3.69921875, "calib/ece": 0.17636363636363628, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.37549407114624506, "calib/gap": 0.13707899934167211, "calib/mean_conf": 0.7824505928853754, "calib/mu_c": 0.8355483870967741, "calib/mu_w": 0.698469387755102, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1730830039525691, "calib/std_conf": 0.20751761325704726, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1740.0, "completions/max_terminated_length": 1740.0, "completions/mean_length": 450.67578125, "completions/mean_terminated_length": 452.44317626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.18133333333333335, "grad_norm": 0.007818641141057014, "learning_rate": 8.333333333333333e-07, "loss": 0.0039, "num_tokens": 37796713.0, "reward": 1.8228336572647095, "reward_std": 0.3818396329879761, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7469961047172546, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9349637031555176, "step": 170 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5743734335839599, "calib/avg_num_step_conf": 3.66796875, "calib/ece": 0.2789370078740158, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.1968503937007874, "calib/gap": 0.055676691729323324, "calib/mean_conf": 0.6964173228346456, "calib/mu_c": 0.7271052631578947, "calib/mu_w": 0.6714285714285714, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2632677165354331, "calib/std_conf": 0.23475099694370016, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 430.09765625, "completions/mean_terminated_length": 431.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.1824, "grad_norm": 0.00880393385887146, "learning_rate": 8.055555555555557e-07, "loss": 0.0378, "num_tokens": 38013714.0, "reward": 1.5643079280853271, "reward_std": 0.48412132263183594, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6585675477981567, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9424140453338623, "step": 171 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5357384594558007, "calib/avg_num_step_conf": 3.58984375, "calib/ece": 0.21105882352941183, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.3411764705882353, "calib/gap": 0.026694192500338554, "calib/mean_conf": 0.7216470588235294, "calib/mu_c": 0.7309638554216868, "calib/mu_w": 0.7042696629213483, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14086274509803925, "calib/std_conf": 0.2421286988464502, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2075.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 431.54296875, "completions/mean_terminated_length": 431.54296875, "completions/min_length": 196.0, "completions/min_terminated_length": 196.0, "epoch": 0.18346666666666667, "grad_norm": 0.008577347733080387, "learning_rate": 7.777777777777779e-07, "loss": 0.0029, "num_tokens": 38227541.0, "reward": 1.8829433917999268, "reward_std": 0.4061717987060547, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7155578136444092, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9412156343460083, "step": 172 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5186795186795187, "calib/avg_num_step_conf": 3.8984375, "calib/ece": 0.28606299212598435, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.421259842519685, "calib/gap": 0.017394317394317405, "calib/mean_conf": 0.7839370078740158, "calib/mu_c": 0.7915384615384616, "calib/mu_w": 0.7741441441441442, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2535039370078741, "calib/std_conf": 0.22368480875952962, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1097.0, "completions/max_terminated_length": 1097.0, "completions/mean_length": 447.54296875, "completions/mean_terminated_length": 449.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.18453333333333333, "grad_norm": 0.00868956372141838, "learning_rate": 7.5e-07, "loss": 0.0118, "num_tokens": 38445272.0, "reward": 1.731661319732666, "reward_std": 0.4513019323348999, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6584905982017517, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9322172403335571, "step": 173 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5523114355231143, "calib/avg_num_step_conf": 3.859375, "calib/ece": 0.32207171314741045, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3426294820717131, "calib/gap": 0.030355999487770458, "calib/mean_conf": 0.7372908366533865, "calib/mu_c": 0.753859649122807, "calib/mu_w": 0.7235036496350365, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30258964143426303, "calib/std_conf": 0.23218139090497097, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2521.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 497.0078125, "completions/mean_terminated_length": 497.0078125, "completions/min_length": 215.0, "completions/min_terminated_length": 215.0, "epoch": 0.1856, "grad_norm": 0.008361944928765297, "learning_rate": 7.222222222222222e-07, "loss": 0.0872, "num_tokens": 38676738.0, "reward": 1.5412625074386597, "reward_std": 0.43886956572532654, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6203457117080688, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9197043180465698, "step": 174 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5983745983745984, "calib/avg_num_step_conf": 3.56640625, "calib/ece": 0.25137795275590563, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2559055118110236, "calib/gap": 0.08235179235179191, "calib/mean_conf": 0.6857086614173229, "calib/mu_c": 0.7320720720720717, "calib/mu_w": 0.6497202797202798, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2500393700787403, "calib/std_conf": 0.24757582502655276, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 449.890625, "completions/mean_terminated_length": 451.6549377441406, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.18666666666666668, "grad_norm": 0.008198156021535397, "learning_rate": 6.944444444444446e-07, "loss": 0.0131, "num_tokens": 38897734.0, "reward": 1.543121337890625, "reward_std": 0.4899318814277649, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6622323989868164, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.932127833366394, "step": 175 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5620226656812022, "calib/avg_num_step_conf": 3.72265625, "calib/ece": 0.2756470588235293, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.35294117647058826, "calib/gap": 0.05977087952697713, "calib/mean_conf": 0.7579215686274509, "calib/mu_c": 0.7888617886178861, "calib/mu_w": 0.729090909090909, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2756078431372548, "calib/std_conf": 0.22568239379297542, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 443.9765625, "completions/mean_terminated_length": 445.7176818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.18773333333333334, "grad_norm": 0.008800878189504147, "learning_rate": 6.666666666666667e-07, "loss": -0.0109, "num_tokens": 39115456.0, "reward": 1.615879774093628, "reward_std": 0.3879082202911377, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6507371068000793, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9377819299697876, "step": 176 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6370708661417323, "calib/avg_num_step_conf": 3.5546875, "calib/ece": 0.21146825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.29365079365079366, "calib/gap": 0.13484031496062976, "calib/mean_conf": 0.6894047619047619, "calib/mu_c": 0.7573599999999998, "calib/mu_w": 0.62251968503937, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20242063492063492, "calib/std_conf": 0.25798679302272365, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 453.82421875, "completions/mean_terminated_length": 455.60394287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.1888, "grad_norm": 0.00836542621254921, "learning_rate": 6.388888888888889e-07, "loss": 0.0377, "num_tokens": 39335467.0, "reward": 1.6355714797973633, "reward_std": 0.3930942416191101, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6990835666656494, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9291399717330933, "step": 177 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5905233380480905, "calib/avg_num_step_conf": 3.5546875, "calib/ece": 0.1696078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.3137254901960784, "calib/gap": 0.0794419441944193, "calib/mean_conf": 0.7003529411764705, "calib/mu_c": 0.7318181818181817, "calib/mu_w": 0.6523762376237624, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13301960784313727, "calib/std_conf": 0.24759920361060206, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 914.0, "completions/max_terminated_length": 914.0, "completions/mean_length": 437.08984375, "completions/mean_terminated_length": 438.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.18986666666666666, "grad_norm": 0.008996784687042236, "learning_rate": 6.111111111111112e-07, "loss": 0.0402, "num_tokens": 39553434.0, "reward": 1.8174397945404053, "reward_std": 0.5554205179214478, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.725355863571167, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9428409337997437, "step": 178 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5589751552795033, "calib/avg_num_step_conf": 3.5078125, "calib/ece": 0.20407843137254905, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.25098039215686274, "calib/gap": 0.05414285714285749, "calib/mean_conf": 0.6977254901960785, "calib/mu_c": 0.7221428571428573, "calib/mu_w": 0.6679999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17639215686274512, "calib/std_conf": 0.2417929821417383, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2778.0, "completions/max_terminated_length": 2778.0, "completions/mean_length": 447.703125, "completions/mean_terminated_length": 447.703125, "completions/min_length": 206.0, "completions/min_terminated_length": 206.0, "epoch": 0.19093333333333334, "grad_norm": 0.008592971600592136, "learning_rate": 5.833333333333334e-07, "loss": 0.0348, "num_tokens": 39774310.0, "reward": 1.7282838821411133, "reward_std": 0.45130395889282227, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6959078311920166, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.943790078163147, "step": 179 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5888141025641026, "calib/avg_num_step_conf": 3.7265625, "calib/ece": 0.23724409448818884, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.452755905511811, "calib/gap": 0.06779230769230771, "calib/mean_conf": 0.7698425196850395, "calib/mu_c": 0.7976000000000001, "calib/mu_w": 0.7298076923076924, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20826771653543297, "calib/std_conf": 0.22764450892574783, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1691.0, "completions/max_terminated_length": 1691.0, "completions/mean_length": 479.984375, "completions/mean_terminated_length": 483.7637634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.192, "grad_norm": 0.007479749154299498, "learning_rate": 5.555555555555555e-07, "loss": -0.0021, "num_tokens": 40001042.0, "reward": 1.7830191850662231, "reward_std": 0.4244670569896698, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.701492965221405, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9305838346481323, "step": 180 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5667370906605357, "calib/avg_num_step_conf": 3.58203125, "calib/ece": 0.32913385826771646, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.421259842519685, "calib/gap": 0.06023488473249239, "calib/mean_conf": 0.7860629921259844, "calib/mu_c": 0.8176033057851239, "calib/mu_w": 0.7573684210526315, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.31940944881889755, "calib/std_conf": 0.22379214723901802, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2660.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 427.046875, "completions/mean_terminated_length": 427.046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.19306666666666666, "grad_norm": 0.00919902604073286, "learning_rate": 5.277777777777779e-07, "loss": 0.0385, "num_tokens": 40216630.0, "reward": 1.590777039527893, "reward_std": 0.5281400084495544, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6234515905380249, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9349694848060608, "step": 181 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6672238652436671, "calib/avg_num_step_conf": 3.5, "calib/ece": 0.2038039215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.43137254901960786, "calib/gap": 0.1320027002700268, "calib/mean_conf": 0.7763529411764705, "calib/mu_c": 0.8286363636363636, "calib/mu_w": 0.6966336633663368, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18811764705882347, "calib/std_conf": 0.22953925065092615, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2196.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 438.4140625, "completions/mean_terminated_length": 438.4140625, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.19413333333333332, "grad_norm": 0.008267982862889767, "learning_rate": 5.000000000000001e-07, "loss": 0.0387, "num_tokens": 40435024.0, "reward": 1.8188896179199219, "reward_std": 0.39455264806747437, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7382515668869019, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.94355708360672, "step": 182 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6263074984247007, "calib/avg_num_step_conf": 3.4921875, "calib/ece": 0.23162055335968376, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.37549407114624506, "calib/gap": 0.11056521739130432, "calib/mean_conf": 0.7377865612648221, "calib/mu_c": 0.7880434782608695, "calib/mu_w": 0.6774782608695652, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21197628458498022, "calib/std_conf": 0.2486545651779337, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1546.0, "completions/max_terminated_length": 1546.0, "completions/mean_length": 482.29296875, "completions/mean_terminated_length": 486.0905456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.1952, "grad_norm": 0.007671494036912918, "learning_rate": 4.7222222222222226e-07, "loss": -0.0029, "num_tokens": 40665171.0, "reward": 1.7111189365386963, "reward_std": 0.5463021397590637, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6997734308242798, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.933765172958374, "step": 183 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5814993564993564, "calib/avg_num_step_conf": 3.4453125, "calib/ece": 0.24713147410358563, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.41434262948207173, "calib/gap": 0.08322200772200794, "calib/mean_conf": 0.7615537848605577, "calib/mu_c": 0.7983571428571429, "calib/mu_w": 0.7151351351351349, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22545816733067728, "calib/std_conf": 0.24086409869355724, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 471.40625, "completions/mean_terminated_length": 476.9960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.19626666666666667, "grad_norm": 0.007687829434871674, "learning_rate": 4.444444444444445e-07, "loss": 0.0316, "num_tokens": 40891131.0, "reward": 1.7092678546905518, "reward_std": 0.580082893371582, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6801937818527222, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9225028157234192, "step": 184 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5625609994144056, "calib/avg_num_step_conf": 3.38671875, "calib/ece": 0.24247999999999992, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.364, "calib/gap": 0.05972151734010023, "calib/mean_conf": 0.75616, "calib/mu_c": 0.7821985815602838, "calib/mu_w": 0.7224770642201835, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.21731999999999993, "calib/std_conf": 0.23194924962154972, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2094.0, "completions/max_terminated_length": 2094.0, "completions/mean_length": 447.8515625, "completions/mean_terminated_length": 456.7729187011719, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.19733333333333333, "grad_norm": 0.008878587745130062, "learning_rate": 4.1666666666666667e-07, "loss": 0.0212, "num_tokens": 41112701.0, "reward": 1.704228162765503, "reward_std": 0.4695289134979248, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6678878664970398, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.9146498441696167, "step": 185 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6606127996046454, "calib/avg_num_step_conf": 3.546875, "calib/ece": 0.2214453125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.33984375, "calib/gap": 0.12739436619718314, "calib/mean_conf": 0.7456640625, "calib/mu_c": 0.8023943661971832, "calib/mu_w": 0.675, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2062109375, "calib/std_conf": 0.2433866451163582, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 875.0, "completions/max_terminated_length": 875.0, "completions/mean_length": 447.9453125, "completions/mean_terminated_length": 451.4724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.1984, "grad_norm": 0.008053626865148544, "learning_rate": 3.8888888888888895e-07, "loss": 0.0144, "num_tokens": 41332415.0, "reward": 1.747734785079956, "reward_std": 0.39273902773857117, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7202167510986328, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9425970315933228, "step": 186 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4550224887556222, "calib/avg_num_step_conf": 4.1953125, "calib/ece": 0.31059055118110224, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4251968503937008, "calib/gap": -0.0163343328335831, "calib/mean_conf": 0.7876771653543306, "calib/mu_c": 0.7802173913043479, "calib/mu_w": 0.796551724137931, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2774803149606298, "calib/std_conf": 0.21472862866131273, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 499.9609375, "completions/mean_terminated_length": 499.9609375, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.19946666666666665, "grad_norm": 0.00757110770791769, "learning_rate": 3.611111111111111e-07, "loss": 0.0674, "num_tokens": 41561949.0, "reward": 1.6947870254516602, "reward_std": 0.4970128536224365, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6329605579376221, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9274370670318604, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48736961166268683, "calib/avg_num_step_conf": 3.578125, "calib/ece": 0.31290196078431354, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.45098039215686275, "calib/gap": -0.00617255246952364, "calib/mean_conf": 0.8032549019607843, "calib/mu_c": 0.8006164383561644, "calib/mu_w": 0.806788990825688, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2718039215686272, "calib/std_conf": 0.21318866201847056, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1146.0, "completions/max_terminated_length": 1146.0, "completions/mean_length": 461.875, "completions/mean_terminated_length": 463.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.20053333333333334, "grad_norm": 0.007569571956992149, "learning_rate": 3.3333333333333335e-07, "loss": 0.0095, "num_tokens": 41784261.0, "reward": 1.7472176551818848, "reward_std": 0.42943885922431946, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6471472978591919, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9354736804962158, "step": 188 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5716177861873227, "calib/avg_num_step_conf": 3.4140625, "calib/ece": 0.25820312499999987, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.43359375, "calib/gap": 0.0431081677704197, "calib/mean_conf": 0.7660937499999999, "calib/mu_c": 0.7837748344370863, "calib/mu_w": 0.7406666666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.21722656249999991, "calib/std_conf": 0.24760803947153553, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1027.0, "completions/max_terminated_length": 1027.0, "completions/mean_length": 428.27734375, "completions/mean_terminated_length": 429.9568786621094, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.2016, "grad_norm": 0.0090395612642169, "learning_rate": 3.055555555555556e-07, "loss": 0.0144, "num_tokens": 42001668.0, "reward": 1.7883484363555908, "reward_std": 0.40974628925323486, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6837632656097412, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9461929798126221, "step": 189 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6864989989989991, "calib/avg_num_step_conf": 3.265625, "calib/ece": 0.2174117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.40784313725490196, "calib/gap": 0.1463269519519521, "calib/mean_conf": 0.776235294117647, "calib/mu_c": 0.8399305555555555, "calib/mu_w": 0.6936036036036034, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.21447058823529408, "calib/std_conf": 0.21931610173403926, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 935.0, "completions/max_terminated_length": 935.0, "completions/mean_length": 469.2265625, "completions/mean_terminated_length": 471.0666809082031, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.20266666666666666, "grad_norm": 0.007644584868103266, "learning_rate": 2.7777777777777776e-07, "loss": -0.003, "num_tokens": 42227398.0, "reward": 1.7453137636184692, "reward_std": 0.4353315234184265, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7184046506881714, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9269128441810608, "step": 190 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5794139374538291, "calib/avg_num_step_conf": 3.53515625, "calib/ece": 0.3589411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.48627450980392156, "calib/gap": 0.03948227037675456, "calib/mean_conf": 0.8143137254901961, "calib/mu_c": 0.8345967741935484, "calib/mu_w": 0.7951145038167938, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34349019607843145, "calib/std_conf": 0.2008282350858299, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2628.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 443.63671875, "completions/mean_terminated_length": 443.63671875, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.20373333333333332, "grad_norm": 0.008346221409738064, "learning_rate": 2.5000000000000004e-07, "loss": 0.0273, "num_tokens": 42445137.0, "reward": 1.611642837524414, "reward_std": 0.420346736907959, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6173906326293945, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9385555982589722, "step": 191 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6621045477014336, "calib/avg_num_step_conf": 3.42578125, "calib/ece": 0.23835294117647055, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.42745098039215684, "calib/gap": 0.1552205882352944, "calib/mean_conf": 0.7657254901960784, "calib/mu_c": 0.8381617647058824, "calib/mu_w": 0.682941176470588, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2353725490196078, "calib/std_conf": 0.24121458012312477, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2361.0, "completions/max_terminated_length": 2361.0, "completions/mean_length": 482.12109375, "completions/mean_terminated_length": 482.12109375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.2048, "grad_norm": 0.00783407874405384, "learning_rate": 2.2222222222222224e-07, "loss": 0.026, "num_tokens": 42673536.0, "reward": 1.7091728448867798, "reward_std": 0.4516070485115051, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7133882641792297, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9436155557632446, "step": 192 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5753201181974883, "calib/avg_num_step_conf": 3.421875, "calib/ece": 0.28921568627450966, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.4196078431372549, "calib/gap": 0.052146638758926445, "calib/mean_conf": 0.7609019607843136, "calib/mu_c": 0.7862595419847328, "calib/mu_w": 0.7341129032258064, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.26819607843137244, "calib/std_conf": 0.24440959312564142, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1641.0, "completions/max_terminated_length": 1641.0, "completions/mean_length": 477.22265625, "completions/mean_terminated_length": 479.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.20586666666666667, "grad_norm": 0.008240696042776108, "learning_rate": 1.9444444444444447e-07, "loss": 0.039, "num_tokens": 42901417.0, "reward": 1.6538842916488647, "reward_std": 0.5406385660171509, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6447539329528809, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.931720495223999, "step": 193 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.64476976976977, "calib/avg_num_step_conf": 3.3515625, "calib/ece": 0.22789062499999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.49609375, "calib/gap": 0.14399399399399393, "calib/mean_conf": 0.7854687499999998, "calib/mu_c": 0.8462162162162162, "calib/mu_w": 0.7022222222222223, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.21761718749999995, "calib/std_conf": 0.24354593821995366, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 923.0, "completions/max_terminated_length": 923.0, "completions/mean_length": 417.890625, "completions/mean_terminated_length": 419.5294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.20693333333333333, "grad_norm": 0.008800129406154156, "learning_rate": 1.6666666666666668e-07, "loss": 0.0017, "num_tokens": 43114341.0, "reward": 1.7753586769104004, "reward_std": 0.5017086863517761, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7127922177314758, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9433302283287048, "step": 194 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6631269618646898, "calib/avg_num_step_conf": 3.359375, "calib/ece": 0.18673228346456677, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.452755905511811, "calib/gap": 0.13623388766446276, "calib/mean_conf": 0.7712992125984252, "calib/mu_c": 0.8211801242236025, "calib/mu_w": 0.6849462365591398, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16208661417322823, "calib/std_conf": 0.2503483499018769, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2161.0, "completions/max_terminated_length": 2161.0, "completions/mean_length": 442.984375, "completions/mean_terminated_length": 442.984375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.208, "grad_norm": 0.00870486069470644, "learning_rate": 1.3888888888888888e-07, "loss": 0.0373, "num_tokens": 43333729.0, "reward": 1.8612749576568604, "reward_std": 0.4175664186477661, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7437324523925781, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9435549378395081, "step": 195 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.557908496732026, "calib/avg_num_step_conf": 3.2265625, "calib/ece": 0.21549407114624491, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.383399209486166, "calib/gap": 0.06189019607843138, "calib/mean_conf": 0.8010276679841897, "calib/mu_c": 0.8254901960784313, "calib/mu_w": 0.7636, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20588932806324095, "calib/std_conf": 0.19607948705152148, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 402.36328125, "completions/mean_terminated_length": 405.531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.20906666666666668, "grad_norm": 0.008162040263414383, "learning_rate": 1.1111111111111112e-07, "loss": -0.0011, "num_tokens": 43539278.0, "reward": 1.802194595336914, "reward_std": 0.2759098410606384, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7052210569381714, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.941057026386261, "step": 196 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6223738495715645, "calib/avg_num_step_conf": 3.4453125, "calib/ece": 0.33499999999999996, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.43253968253968256, "calib/gap": 0.1003122818152965, "calib/mean_conf": 0.7846825396825396, "calib/mu_c": 0.8392173913043476, "calib/mu_w": 0.7389051094890511, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.33166666666666667, "calib/std_conf": 0.21787460536600237, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2516.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 431.19140625, "completions/mean_terminated_length": 436.3043518066406, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.21013333333333334, "grad_norm": 0.008852004073560238, "learning_rate": 8.333333333333334e-08, "loss": -0.0215, "num_tokens": 43754719.0, "reward": 1.5589957237243652, "reward_std": 0.557856559753418, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6363070011138916, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9356131553649902, "step": 197 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5163560666137985, "calib/avg_num_step_conf": 3.64453125, "calib/ece": 0.22762845849802363, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4031620553359684, "calib/gap": 0.04762688342585242, "calib/mean_conf": 0.7846245059288537, "calib/mu_c": 0.8028846153846153, "calib/mu_w": 0.7552577319587629, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19782608695652165, "calib/std_conf": 0.2153707101689293, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2137.0, "completions/max_terminated_length": 2137.0, "completions/mean_length": 427.171875, "completions/mean_terminated_length": 430.5354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.2112, "grad_norm": 0.008308351039886475, "learning_rate": 5.555555555555556e-08, "loss": 0.0106, "num_tokens": 43969459.0, "reward": 1.8177075386047363, "reward_std": 0.4939545691013336, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7031598091125488, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9348583221435547, "step": 198 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6144176180672531, "calib/avg_num_step_conf": 3.52734375, "calib/ece": 0.2922440944881891, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.484251968503937, "calib/gap": 0.08612826751512892, "calib/mean_conf": 0.8039763779527558, "calib/mu_c": 0.8436496350364964, "calib/mu_w": 0.7575213675213675, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2784251968503938, "calib/std_conf": 0.21471170799990183, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2095.0, "completions/max_terminated_length": 2095.0, "completions/mean_length": 482.859375, "completions/mean_terminated_length": 484.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.21226666666666666, "grad_norm": 0.008521615527570248, "learning_rate": 2.777777777777778e-08, "loss": 0.0147, "num_tokens": 44197271.0, "reward": 1.7006497383117676, "reward_std": 0.5969787836074829, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6729308366775513, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9343558549880981, "step": 199 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6254920634920635, "calib/avg_num_step_conf": 3.41015625, "calib/ece": 0.2550980392156863, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4823529411764706, "calib/gap": 0.07900952380952386, "calib/mean_conf": 0.7966666666666665, "calib/mu_c": 0.8292, "calib/mu_w": 0.7501904761904762, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.23176470588235298, "calib/std_conf": 0.22270763976670738, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1165.0, "completions/max_terminated_length": 1165.0, "completions/mean_length": 448.09765625, "completions/mean_terminated_length": 449.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.21333333333333335, "grad_norm": 0.008153123781085014, "learning_rate": 0.0, "loss": 0.0175, "num_tokens": 44420032.0, "reward": 1.78486168384552, "reward_std": 0.38065484166145325, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6998906135559082, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9395561218261719, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.03268525441642851, "train_runtime": 11125.5146, "train_samples_per_second": 4.602, "train_steps_per_second": 0.018 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44420032, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }