{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.5285714285714285, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.9285714285714286, "calib/gap": 0.02833333333333321, "calib/mean_conf": 0.9571428571428572, "calib/mu_c": 0.9733333333333333, "calib/mu_w": 0.9450000000000001, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.5285714285714285, "calib/std_conf": 0.033896601479156206, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 658.8203125, "completions/mean_terminated_length": 714.6525268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.09962800145149231, "learning_rate": 2.5000000000000004e-07, "loss": 0.0146, "num_tokens": 276242.0, "reward": 0.052166663110256195, "reward_std": 0.09830024838447571, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.024793751537799835, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.03152916580438614, "step": 1 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.1851851851851852, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.2141666666666665, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.01666666666666672, "calib/mean_conf": 0.9641666666666665, "calib/mu_c": 0.9599999999999999, "calib/mu_w": 0.9766666666666666, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.2141666666666665, "calib/std_conf": 0.014409680388158833, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2927.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 749.54296875, "completions/mean_terminated_length": 820.0128784179688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.09382238984107971, "learning_rate": 5.000000000000001e-07, "loss": -0.014, "num_tokens": 571413.0, "reward": 0.06798964738845825, "reward_std": 0.1538330614566803, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.03563320264220238, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.03320039063692093, "step": 2 }, { "calib/answer_extract_rate": 0.0390625, "calib/auroc": 0.875, "calib/avg_num_step_conf": 0.16015625, "calib/ece": 0.44999999999999996, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.02499999999999991, "calib/mean_conf": 0.95, "calib/mu_c": 0.9624999999999999, "calib/mu_w": 0.9375, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.44999999999999996, "calib/std_conf": 0.021213203435596413, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 637.484375, "completions/mean_terminated_length": 722.106201171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.028764434158802032, "learning_rate": 7.5e-07, "loss": -0.0068, "num_tokens": 839865.0, "reward": 0.03087550215423107, "reward_std": 0.07833844423294067, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.012537500821053982, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.013308260589838028, "step": 3 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.7083333333333333, "calib/avg_num_step_conf": 0.17578125, "calib/ece": 0.38142857142857145, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.022500000000000075, "calib/mean_conf": 0.9528571428571428, "calib/mu_c": 0.9625, "calib/mu_w": 0.94, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.38142857142857145, "calib/std_conf": 0.027627256579733875, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 712.1953125, "completions/mean_terminated_length": 806.7344970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.013823305256664753, "learning_rate": 1.0000000000000002e-06, "loss": -0.0006, "num_tokens": 1128355.0, "reward": 0.030875656753778458, "reward_std": 0.059030190110206604, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.0169496089220047, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.01670926623046398, "step": 4 }, { "calib/answer_extract_rate": 0.01953125, "calib/avg_num_step_conf": 0.12109375, "calib/ece": 0.914, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.6, "calib/mean_conf": 0.914, "calib/mu_c": NaN, "calib/mu_w": 0.914, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.914, "calib/std_conf": 0.06406246951218784, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 677.8671875, "completions/mean_terminated_length": 754.49560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.008904083631932735, "learning_rate": 1.25e-06, "loss": 0.0099, "num_tokens": 1408577.0, "reward": 0.010494021698832512, "reward_std": 0.029681574553251266, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.002392577938735485, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.008333507925271988, "step": 5 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.2727272727272727, "calib/avg_num_step_conf": 0.34765625, "calib/ece": 0.6862222222222225, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": -0.010757575757575855, "calib/mean_conf": 0.9528888888888891, "calib/mu_c": 0.9450000000000001, "calib/mu_w": 0.9557575757575759, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6862222222222225, "calib/std_conf": 0.0215291935018362, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 682.63671875, "completions/mean_terminated_length": 731.1924438476562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.014401497319340706, "learning_rate": 1.5e-06, "loss": -0.0048, "num_tokens": 1689284.0, "reward": 0.03417190909385681, "reward_std": 0.07955536246299744, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.013880860060453415, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.021244270727038383, "step": 6 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.5178571428571429, "calib/avg_num_step_conf": 0.17578125, "calib/ece": 0.6027272727272728, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0017857142857142794, "calib/mean_conf": 0.9663636363636364, "calib/mu_c": 0.9674999999999999, "calib/mu_w": 0.9657142857142856, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.6027272727272728, "calib/std_conf": 0.018227216050694037, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2711.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 744.921875, "completions/mean_terminated_length": 851.3392944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.01843842677772045, "learning_rate": 1.75e-06, "loss": 0.0168, "num_tokens": 1987408.0, "reward": 0.027878936380147934, "reward_std": 0.06056400388479233, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.012765233404934406, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.012813015840947628, "step": 7 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.171875, "calib/ece": 0.04500000000000004, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.955, "calib/mu_c": 0.955, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.0, "calib/std_conf": 0.02291287847477918, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 779.12890625, "completions/mean_terminated_length": 852.38037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.008533333333333334, "grad_norm": 0.005653747823089361, "learning_rate": 2.0000000000000003e-06, "loss": 0.0054, "num_tokens": 2293377.0, "reward": 0.026519201695919037, "reward_std": 0.07500763237476349, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.0155851561576128, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.012366651557385921, "step": 8 }, { "calib/answer_extract_rate": 0.0390625, "calib/auroc": 0.39999999999999997, "calib/avg_num_step_conf": 0.14453125, "calib/ece": 0.4249999999999998, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.04999999999999993, "calib/mean_conf": 0.925, "calib/mu_c": 0.95, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.4249999999999998, "calib/std_conf": 0.11101801655587258, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 681.54296875, "completions/mean_terminated_length": 742.44677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.018278148025274277, "learning_rate": 2.25e-06, "loss": -0.0054, "num_tokens": 2575388.0, "reward": 0.03480459004640579, "reward_std": 0.09339861571788788, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.01447617169469595, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.01146093662828207, "step": 9 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.40740740740740744, "calib/avg_num_step_conf": 0.23046875, "calib/ece": 0.2466666666666666, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": -0.040000000000000036, "calib/mean_conf": 0.9333333333333332, "calib/mu_c": 0.9233333333333332, "calib/mu_w": 0.9633333333333333, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.21499999999999994, "calib/std_conf": 0.09551032521262935, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 769.69921875, "completions/mean_terminated_length": 824.4476928710938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.014670617878437042, "learning_rate": 2.5e-06, "loss": -0.0144, "num_tokens": 2879231.0, "reward": 0.059109218418598175, "reward_std": 0.13718387484550476, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.031102342531085014, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.025647033005952835, "step": 10 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.6805555555555556, "calib/avg_num_step_conf": 0.2109375, "calib/ece": 0.46791666666666654, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.01750000000000007, "calib/mean_conf": 0.9679166666666665, "calib/mu_c": 0.9766666666666667, "calib/mu_w": 0.9591666666666666, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.46791666666666654, "calib/std_conf": 0.022863574863864915, "calib/step_conf_rate": 0.0390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 758.05859375, "completions/mean_terminated_length": 811.9790649414062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.010482767596840858, "learning_rate": 2.7500000000000004e-06, "loss": -0.0218, "num_tokens": 3177774.0, "reward": 0.04779710993170738, "reward_std": 0.10256500542163849, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.020838573575019836, "rewards/format_reward_step": 0.03515625, "rewards/stepwise_brier_reward": 0.025818627327680588, "step": 11 }, { "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.19791666666666666, "calib/avg_num_step_conf": 0.31640625, "calib/ece": 0.5385714285714286, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7857142857142857, "calib/gap": -0.0495833333333332, "calib/mean_conf": 0.9400000000000001, "calib/mu_c": 0.9116666666666667, "calib/mu_w": 0.9612499999999999, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.525, "calib/std_conf": 0.04503966505838412, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 696.68359375, "completions/mean_terminated_length": 768.7543334960938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.021372651681303978, "learning_rate": 3e-06, "loss": -0.0073, "num_tokens": 3460301.0, "reward": 0.04767921566963196, "reward_std": 0.11198088526725769, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.021225782111287117, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.03277232497930527, "step": 12 }, { "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.27777777777777773, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.33933333333333343, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.5333333333333333, "calib/gap": 0.06500000000000006, "calib/mean_conf": 0.8740000000000001, "calib/mu_c": 0.8999999999999999, "calib/mu_w": 0.8349999999999999, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.3066666666666668, "calib/std_conf": 0.2047697894384488, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 729.8984375, "completions/mean_terminated_length": 795.1233520507812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.011653637513518333, "learning_rate": 3.2500000000000002e-06, "loss": -0.0038, "num_tokens": 3751747.0, "reward": 0.06711044907569885, "reward_std": 0.14474359154701233, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.03549882769584656, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.029817987233400345, "step": 13 }, { "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.5033333333333333, "calib/avg_num_step_conf": 0.6171875, "calib/ece": 0.3548000000000001, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.92, "calib/gap": 0.008000000000000007, "calib/mean_conf": 0.9547999999999999, "calib/mu_c": 0.958, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.11328125, "calib/pce": 0.3548000000000001, "calib/std_conf": 0.04271955055943355, "calib/step_conf_rate": 0.11328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2981.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 748.015625, "completions/mean_terminated_length": 807.9830932617188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.020944004878401756, "learning_rate": 3.5e-06, "loss": -0.0232, "num_tokens": 4048639.0, "reward": 0.11581047624349594, "reward_std": 0.22864821553230286, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.0616636723279953, "rewards/format_reward_step": 0.08984375, "rewards/stepwise_brier_reward": 0.061734482645988464, "step": 14 }, { "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.5491071428571429, "calib/avg_num_step_conf": 0.45703125, "calib/ece": 0.5995454545454546, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.004821428571428421, "calib/mean_conf": 0.9631818181818183, "calib/mu_c": 0.9662499999999999, "calib/mu_w": 0.9614285714285715, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.5995454545454546, "calib/std_conf": 0.020087617994531227, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 708.734375, "completions/mean_terminated_length": 768.796630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.02068154141306877, "learning_rate": 3.7500000000000005e-06, "loss": -0.0043, "num_tokens": 4337955.0, "reward": 0.060002513229846954, "reward_std": 0.13000041246414185, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.01880195364356041, "rewards/format_reward_step": 0.0625, "rewards/stepwise_brier_reward": 0.033708103001117706, "step": 15 }, { "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.6628787878787878, "calib/avg_num_step_conf": 0.8046875, "calib/ece": 0.588235294117647, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.8529411764705882, "calib/gap": 0.032954545454545126, "calib/mean_conf": 0.9411764705882353, "calib/mu_c": 0.9624999999999998, "calib/mu_w": 0.9295454545454547, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.13671875, "calib/pce": 0.588235294117647, "calib/std_conf": 0.04824963918684676, "calib/step_conf_rate": 0.13671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 840.05078125, "completions/mean_terminated_length": 892.3361206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.017066666666666667, "grad_norm": 0.018788030371069908, "learning_rate": 4.000000000000001e-06, "loss": 0.0434, "num_tokens": 4661856.0, "reward": 0.10886503756046295, "reward_std": 0.1589367389678955, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.046876952052116394, "rewards/format_reward_step": 0.10546875, "rewards/stepwise_brier_reward": 0.06436442583799362, "step": 16 }, { "calib/answer_extract_rate": 0.234375, "calib/auroc": 0.541501976284585, "calib/avg_num_step_conf": 1.1875, "calib/ece": 0.35812499999999997, "calib/final_conf_rate": 0.21875, "calib/format_rate": 0.17578125, "calib/frac_conf_gt_0.9": 0.8392857142857143, "calib/gap": 0.0022463768115943417, "calib/mean_conf": 0.9474107142857143, "calib/mu_c": 0.9483333333333334, "calib/mu_w": 0.946086956521739, "calib/nonempty_final_conf_rate": 0.21875, "calib/nonempty_reasoning_rate": 0.2421875, "calib/nonempty_step_conf_rate": 0.21875, "calib/pce": 0.35812499999999997, "calib/std_conf": 0.05055929220292965, "calib/step_conf_rate": 0.21875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 639.2421875, "completions/mean_terminated_length": 693.415283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.03104730136692524, "learning_rate": 4.25e-06, "loss": -0.0448, "num_tokens": 4929030.0, "reward": 0.24797993898391724, "reward_std": 0.4323830306529999, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.11929208785295486, "rewards/format_reward_step": 0.17578125, "rewards/stepwise_brier_reward": 0.1343463659286499, "step": 17 }, { "calib/answer_extract_rate": 0.30859375, "calib/auroc": 0.5597545219638242, "calib/avg_num_step_conf": 1.859375, "calib/ece": 0.4956835443037975, "calib/final_conf_rate": 0.30859375, "calib/format_rate": 0.23828125, "calib/frac_conf_gt_0.9": 0.8354430379746836, "calib/gap": 0.012775193798449602, "calib/mean_conf": 0.9513797468354431, "calib/mu_c": 0.9583333333333331, "calib/mu_w": 0.9455581395348835, "calib/nonempty_final_conf_rate": 0.30859375, "calib/nonempty_reasoning_rate": 0.34375, "calib/nonempty_step_conf_rate": 0.2890625, "calib/pce": 0.4956835443037975, "calib/std_conf": 0.038294430238321596, "calib/step_conf_rate": 0.2890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2948.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 708.203125, "completions/mean_terminated_length": 739.9999389648438, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0192, "grad_norm": 0.02486121654510498, "learning_rate": 4.5e-06, "loss": 0.003, "num_tokens": 5221050.0, "reward": 0.2707511782646179, "reward_std": 0.30285897850990295, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.11842303723096848, "rewards/format_reward_step": 0.23828125, "rewards/stepwise_brier_reward": 0.14817550778388977, "step": 18 }, { "calib/answer_extract_rate": 0.56640625, "calib/auroc": 0.48137860082304523, "calib/avg_num_step_conf": 3.55078125, "calib/ece": 0.5414184397163121, "calib/final_conf_rate": 0.55078125, "calib/format_rate": 0.46484375, "calib/frac_conf_gt_0.9": 0.9361702127659575, "calib/gap": -0.0010864197530864317, "calib/mean_conf": 0.9592907801418439, "calib/mu_c": 0.9586666666666666, "calib/mu_w": 0.959753086419753, "calib/nonempty_final_conf_rate": 0.55078125, "calib/nonempty_reasoning_rate": 0.62890625, "calib/nonempty_step_conf_rate": 0.58203125, "calib/pce": 0.5375886524822695, "calib/std_conf": 0.04203418720181966, "calib/step_conf_rate": 0.58203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2673.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 541.9296875, "completions/mean_terminated_length": 570.9218139648438, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.020266666666666665, "grad_norm": 0.03312524035573006, "learning_rate": 4.75e-06, "loss": -0.0417, "num_tokens": 5464544.0, "reward": 0.5150432586669922, "reward_std": 0.4770926237106323, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.23953086137771606, "rewards/format_reward_step": 0.46484375, "rewards/stepwise_brier_reward": 0.3245484232902527, "step": 19 }, { "calib/answer_extract_rate": 0.81640625, "calib/auroc": 0.45312500000000006, "calib/avg_num_step_conf": 5.14453125, "calib/ece": 0.49648437500000003, "calib/final_conf_rate": 0.75, "calib/format_rate": 0.6796875, "calib/frac_conf_gt_0.9": 0.9010416666666666, "calib/gap": -0.003020104895104847, "calib/mean_conf": 0.9548177083333332, "calib/mu_c": 0.9531818181818181, "calib/mu_w": 0.956201923076923, "calib/nonempty_final_conf_rate": 0.75, "calib/nonempty_reasoning_rate": 0.8828125, "calib/nonempty_step_conf_rate": 0.83203125, "calib/pce": 0.49648437500000003, "calib/std_conf": 0.035529376034697406, "calib/step_conf_rate": 0.83203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2859.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 480.49609375, "completions/mean_terminated_length": 486.1936950683594, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.021333333333333333, "grad_norm": 0.04117002710700035, "learning_rate": 5e-06, "loss": 0.0026, "num_tokens": 5692423.0, "reward": 0.740427553653717, "reward_std": 0.5722657442092896, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.35688430070877075, "rewards/format_reward_step": 0.6796875, "rewards/stepwise_brier_reward": 0.4720134139060974, "step": 20 }, { "calib/answer_extract_rate": 0.88671875, "calib/auroc": 0.5746460746460745, "calib/avg_num_step_conf": 6.00390625, "calib/ece": 0.45457399103139007, "calib/final_conf_rate": 0.87109375, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.8878923766816144, "calib/gap": 0.0023069498069497874, "calib/mean_conf": 0.9527802690582959, "calib/mu_c": 0.9539285714285715, "calib/mu_w": 0.9516216216216217, "calib/nonempty_final_conf_rate": 0.87109375, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.45255605381165914, "calib/std_conf": 0.04369052565733906, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1702.0, "completions/max_terminated_length": 1702.0, "completions/mean_length": 401.640625, "completions/mean_terminated_length": 406.4031677246094, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0224, "grad_norm": 0.023674216121435165, "learning_rate": 4.9722222222222224e-06, "loss": -0.0015, "num_tokens": 5898203.0, "reward": 0.9327347278594971, "reward_std": 0.6169314980506897, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.46421563625335693, "rewards/format_reward_step": 0.8203125, "rewards/stepwise_brier_reward": 0.6026610136032104, "step": 21 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.4727504478434615, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.4452863070539419, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.8589211618257261, "calib/gap": -0.00298077718065326, "calib/mean_conf": 0.9486058091286306, "calib/mu_c": 0.9471463414634147, "calib/mu_w": 0.950127118644068, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.44175933609958506, "calib/std_conf": 0.05277831001223703, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 394.5703125, "completions/mean_terminated_length": 396.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.023466666666666667, "grad_norm": 0.021677304059267044, "learning_rate": 4.944444444444445e-06, "loss": 0.0094, "num_tokens": 6101029.0, "reward": 0.9807651042938232, "reward_std": 0.5260076522827148, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.49311840534210205, "rewards/format_reward_step": 0.875, "rewards/stepwise_brier_reward": 0.6174420118331909, "step": 22 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4372354497354497, "calib/avg_num_step_conf": 6.13671875, "calib/ece": 0.4589207317073172, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.8536585365853658, "calib/gap": -0.007914682539682305, "calib/mean_conf": 0.9385955284552845, "calib/mu_c": 0.9345416666666667, "calib/mu_w": 0.942456349206349, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4548556910569107, "calib/std_conf": 0.08487867968199159, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2111.0, "completions/max_terminated_length": 2111.0, "completions/mean_length": 395.18359375, "completions/mean_terminated_length": 398.2952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.024533333333333334, "grad_norm": 0.03677528351545334, "learning_rate": 4.9166666666666665e-06, "loss": -0.0033, "num_tokens": 6306132.0, "reward": 0.9975969791412354, "reward_std": 0.5517216324806213, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.509886622428894, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6640949249267578, "step": 23 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5028132992327365, "calib/avg_num_step_conf": 6.7109375, "calib/ece": 0.5971544715447153, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.8780487804878049, "calib/gap": 0.006686152721958494, "calib/mean_conf": 0.9426829268292682, "calib/mu_c": 0.947058823529412, "calib/mu_w": 0.9403726708074535, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.5971544715447153, "calib/std_conf": 0.05813599378653031, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2547.0, "completions/max_terminated_length": 2547.0, "completions/mean_length": 427.65625, "completions/mean_terminated_length": 432.727294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.0256, "grad_norm": 0.06392373144626617, "learning_rate": 4.888888888888889e-06, "loss": -0.0143, "num_tokens": 6520124.0, "reward": 0.8158930540084839, "reward_std": 0.4685792326927185, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.39301759004592896, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.6088360548019409, "step": 24 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5303343350864012, "calib/avg_num_step_conf": 6.53125, "calib/ece": 0.4575889328063241, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.8537549407114624, "calib/gap": 0.015082644628099273, "calib/mean_conf": 0.9358498023715415, "calib/mu_c": 0.9437190082644629, "calib/mu_w": 0.9286363636363636, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4575889328063241, "calib/std_conf": 0.08470694173573859, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 383.14453125, "completions/mean_terminated_length": 387.6877746582031, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.02666666666666667, "grad_norm": 0.014866037294268608, "learning_rate": 4.861111111111111e-06, "loss": -0.0125, "num_tokens": 6721433.0, "reward": 1.012782096862793, "reward_std": 0.46653372049331665, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.524861752986908, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.678610622882843, "step": 25 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5942113442113443, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.3894377510040163, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.7028112449799196, "calib/gap": 0.026897824397824288, "calib/mean_conf": 0.9195582329317268, "calib/mu_c": 0.9321969696969696, "calib/mu_w": 0.9052991452991453, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3894377510040163, "calib/std_conf": 0.061109270937132205, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2187.0, "completions/max_terminated_length": 2187.0, "completions/mean_length": 398.90625, "completions/mean_terminated_length": 403.6363830566406, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.027733333333333332, "grad_norm": 0.01873624511063099, "learning_rate": 4.833333333333333e-06, "loss": 0.0006, "num_tokens": 6928793.0, "reward": 1.0847468376159668, "reward_std": 0.46095460653305054, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5918804407119751, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7002320289611816, "step": 26 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47764383212144407, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.3622310756972112, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.5617529880478087, "calib/gap": 0.00357315984181672, "calib/mean_conf": 0.8960956175298805, "calib/mu_c": 0.8977611940298507, "calib/mu_w": 0.8941880341880339, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3622310756972112, "calib/std_conf": 0.0760471049549489, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1100.0, "completions/max_terminated_length": 1100.0, "completions/mean_length": 398.19140625, "completions/mean_terminated_length": 404.5119323730469, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.0288, "grad_norm": 0.029240785166621208, "learning_rate": 4.805555555555556e-06, "loss": -0.0061, "num_tokens": 7135946.0, "reward": 1.097345232963562, "reward_std": 0.4886489510536194, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6035945415496826, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6998488306999207, "step": 27 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5475378663694184, "calib/avg_num_step_conf": 6.08203125, "calib/ece": 0.2791269841269841, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4365079365079365, "calib/gap": 0.015394400367188976, "calib/mean_conf": 0.8783333333333332, "calib/mu_c": 0.8845033112582782, "calib/mu_w": 0.8691089108910892, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2791269841269841, "calib/std_conf": 0.08005206639015218, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 410.1328125, "completions/mean_terminated_length": 414.9960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.029866666666666666, "grad_norm": 0.023800725117325783, "learning_rate": 4.777777777777778e-06, "loss": 0.0182, "num_tokens": 7347884.0, "reward": 1.195488691329956, "reward_std": 0.3852658271789551, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6708390712738037, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7712717652320862, "step": 28 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4780607061214122, "calib/avg_num_step_conf": 6.671875, "calib/ece": 0.3634661354581673, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3545816733067729, "calib/gap": 0.010220345440690615, "calib/mean_conf": 0.8574900398406375, "calib/mu_c": 0.8626612903225805, "calib/mu_w": 0.8524409448818899, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3634661354581673, "calib/std_conf": 0.0959813794420564, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 460.5390625, "completions/mean_terminated_length": 462.3451232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.030933333333333334, "grad_norm": 0.016723180189728737, "learning_rate": 4.75e-06, "loss": 0.0171, "num_tokens": 7572910.0, "reward": 1.0748692750930786, "reward_std": 0.3741965889930725, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6018363237380981, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7640469670295715, "step": 29 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5083301779628928, "calib/avg_num_step_conf": 6.515625, "calib/ece": 0.27711462450592883, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.22134387351778656, "calib/gap": 0.003951154865581286, "calib/mean_conf": 0.819802371541502, "calib/mu_c": 0.8215827338129497, "calib/mu_w": 0.8176315789473684, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27375494071146245, "calib/std_conf": 0.1048034131571847, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2495.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 447.73046875, "completions/mean_terminated_length": 453.03955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.032, "grad_norm": 0.05115363001823425, "learning_rate": 4.722222222222222e-06, "loss": -0.0194, "num_tokens": 7794513.0, "reward": 1.1473350524902344, "reward_std": 0.40493902564048767, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6624449491500854, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7667391896247864, "step": 30 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5041946308724832, "calib/avg_num_step_conf": 5.8828125, "calib/ece": 0.36766798418972324, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.1067193675889328, "calib/gap": 0.0031272586473929387, "calib/mean_conf": 0.7787351778656125, "calib/mu_c": 0.7805769230769232, "calib/mu_w": 0.7774496644295302, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36766798418972324, "calib/std_conf": 0.11215100028659829, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 429.30859375, "completions/mean_terminated_length": 432.68896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.03306666666666667, "grad_norm": 0.02748139202594757, "learning_rate": 4.694444444444445e-06, "loss": -0.0099, "num_tokens": 8010328.0, "reward": 0.9985453486442566, "reward_std": 0.3782951533794403, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6011323928833008, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.791486382484436, "step": 31 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5489411475006156, "calib/avg_num_step_conf": 5.6796875, "calib/ece": 0.22027450980392155, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.02243105146515656, "calib/mean_conf": 0.730313725490196, "calib/mu_c": 0.7412213740458016, "calib/mu_w": 0.718790322580645, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21843137254901956, "calib/std_conf": 0.12274595000644, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3058.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 396.60546875, "completions/mean_terminated_length": 401.3083190917969, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.034133333333333335, "grad_norm": 0.021270155906677246, "learning_rate": 4.666666666666667e-06, "loss": -0.0006, "num_tokens": 8218563.0, "reward": 1.141471028327942, "reward_std": 0.31708455085754395, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6941863298416138, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8326352834701538, "step": 32 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5461218497675556, "calib/avg_num_step_conf": 5.86328125, "calib/ece": 0.18972656250000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": 0.01911304135062386, "calib/mean_conf": 0.6957421875, "calib/mu_c": 0.7048507462686567, "calib/mu_w": 0.6857377049180329, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18101562500000007, "calib/std_conf": 0.1369400385486832, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1026.0, "completions/max_terminated_length": 1026.0, "completions/mean_length": 391.53515625, "completions/mean_terminated_length": 397.7500305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.0352, "grad_norm": 0.023767782375216484, "learning_rate": 4.638888888888889e-06, "loss": 0.0194, "num_tokens": 8425668.0, "reward": 1.1614985466003418, "reward_std": 0.3436848223209381, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7054198980331421, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8546367883682251, "step": 33 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49464104515474383, "calib/avg_num_step_conf": 5.50390625, "calib/ece": 0.10649606299212601, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": -0.0037962962962961866, "calib/mean_conf": 0.6516141732283465, "calib/mu_c": 0.65, "calib/mu_w": 0.6537962962962962, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09165354330708665, "calib/std_conf": 0.13529841891657243, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 979.0, "completions/max_terminated_length": 979.0, "completions/mean_length": 369.26171875, "completions/mean_terminated_length": 375.123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.03626666666666667, "grad_norm": 0.029675081372261047, "learning_rate": 4.611111111111112e-06, "loss": -0.0077, "num_tokens": 8625311.0, "reward": 1.2158899307250977, "reward_std": 0.42698150873184204, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.723834753036499, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8662872314453125, "step": 34 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4940976454737922, "calib/avg_num_step_conf": 5.71484375, "calib/ece": 0.10698412698412701, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.0014691730288060034, "calib/mean_conf": 0.6376984126984127, "calib/mu_c": 0.637062937062937, "calib/mu_w": 0.638532110091743, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08861111111111115, "calib/std_conf": 0.1356417006302318, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 478.87109375, "completions/mean_terminated_length": 486.4722595214844, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.037333333333333336, "grad_norm": 0.013189886696636677, "learning_rate": 4.583333333333333e-06, "loss": -0.0116, "num_tokens": 8857158.0, "reward": 1.1923422813415527, "reward_std": 0.3462263345718384, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7173745632171631, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8371508121490479, "step": 35 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5412643924765788, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.059414062499999976, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.012688979475076967, "calib/mean_conf": 0.6500390625000001, "calib/mu_c": 0.653954802259887, "calib/mu_w": 0.64126582278481, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009023437499999992, "calib/std_conf": 0.10886308074421325, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 392.64453125, "completions/mean_terminated_length": 398.87701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.0384, "grad_norm": 0.02006423845887184, "learning_rate": 4.555555555555556e-06, "loss": -0.0094, "num_tokens": 9060387.0, "reward": 1.3527624607086182, "reward_std": 0.300744891166687, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7755589485168457, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8737719058990479, "step": 36 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5724043715846995, "calib/avg_num_step_conf": 5.69140625, "calib/ece": 0.21741035856573704, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.027217562587368183, "calib/mean_conf": 0.6833067729083665, "calib/mu_c": 0.6972950819672131, "calib/mu_w": 0.6700775193798449, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20733067729083665, "calib/std_conf": 0.1180106582141447, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2652.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 466.546875, "completions/mean_terminated_length": 470.220458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.039466666666666664, "grad_norm": 0.03178872913122177, "learning_rate": 4.527777777777778e-06, "loss": 0.0179, "num_tokens": 9286919.0, "reward": 1.1057353019714355, "reward_std": 0.32003992795944214, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6970722675323486, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8391504287719727, "step": 37 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5510478573662808, "calib/avg_num_step_conf": 5.5390625, "calib/ece": 0.20070866141732283, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.016439161714106865, "calib/mean_conf": 0.7479527559055119, "calib/mu_c": 0.7553956834532374, "calib/mu_w": 0.7389565217391305, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20070866141732283, "calib/std_conf": 0.09473825178180512, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 458.640625, "completions/mean_terminated_length": 464.0790710449219, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.04053333333333333, "grad_norm": 0.014871971681714058, "learning_rate": 4.5e-06, "loss": -0.0138, "num_tokens": 9511219.0, "reward": 1.1796945333480835, "reward_std": 0.33943718671798706, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7055633068084717, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8491523861885071, "step": 38 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5668404393967603, "calib/avg_num_step_conf": 5.62890625, "calib/ece": 0.29496062992125976, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1220472440944882, "calib/gap": 0.018241171724694527, "calib/mean_conf": 0.8107086614173228, "calib/mu_c": 0.8195419847328245, "calib/mu_w": 0.8013008130081299, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.29496062992125976, "calib/std_conf": 0.08018365830291181, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2013.0, "completions/max_terminated_length": 2013.0, "completions/mean_length": 428.390625, "completions/mean_terminated_length": 433.4703674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.0416, "grad_norm": 0.028158774599432945, "learning_rate": 4.472222222222223e-06, "loss": 0.004, "num_tokens": 9726975.0, "reward": 1.13348388671875, "reward_std": 0.33712756633758545, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.660125732421875, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8386534452438354, "step": 39 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49153605015673985, "calib/avg_num_step_conf": 5.515625, "calib/ece": 0.41933333333333345, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.25098039215686274, "calib/gap": 0.003366771159874382, "calib/mean_conf": 0.8471764705882353, "calib/mu_c": 0.8490909090909088, "calib/mu_w": 0.8457241379310344, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41756862745098056, "calib/std_conf": 0.07143259701953315, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 466.23046875, "completions/mean_terminated_length": 471.7589111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.042666666666666665, "grad_norm": 0.02498670481145382, "learning_rate": 4.444444444444444e-06, "loss": -0.0088, "num_tokens": 9953090.0, "reward": 1.0184870958328247, "reward_std": 0.3680182695388794, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5761066675186157, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7829978466033936, "step": 40 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46162923177083337, "calib/avg_num_step_conf": 5.61328125, "calib/ece": 0.11953125000000012, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.33203125, "calib/gap": -0.0050000000000000044, "calib/mean_conf": 0.86953125, "calib/mu_c": 0.86828125, "calib/mu_w": 0.87328125, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11953125000000012, "calib/std_conf": 0.05990043007723317, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 756.0, "completions/max_terminated_length": 756.0, "completions/mean_length": 398.42578125, "completions/mean_terminated_length": 404.7500305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.04373333333333333, "grad_norm": 0.043474648147821426, "learning_rate": 4.416666666666667e-06, "loss": 0.0036, "num_tokens": 10162335.0, "reward": 1.4100008010864258, "reward_std": 0.3171630799770355, "rewards/accuracy_reward_step": 0.75, "rewards/final_brier_reward_step": 0.7927491664886475, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8472540974617004, "step": 41 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5286971368531652, "calib/avg_num_step_conf": 5.84375, "calib/ece": 0.2522265625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.42578125, "calib/gap": 0.00930785395324385, "calib/mean_conf": 0.8850390625, "calib/mu_c": 0.8884567901234568, "calib/mu_w": 0.879148936170213, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2522265625000001, "calib/std_conf": 0.054450364774913065, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 826.0, "completions/max_terminated_length": 826.0, "completions/mean_length": 372.28515625, "completions/mean_terminated_length": 378.1944580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.0448, "grad_norm": 0.021088628098368645, "learning_rate": 4.388888888888889e-06, "loss": -0.0122, "num_tokens": 10362008.0, "reward": 1.2652591466903687, "reward_std": 0.24596109986305237, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7053816318511963, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8244049549102783, "step": 42 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6187326782564877, "calib/avg_num_step_conf": 5.68359375, "calib/ece": 0.281372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2196078431372549, "calib/gap": 0.029667422524565445, "calib/mean_conf": 0.8578431372549019, "calib/mu_c": 0.8704081632653061, "calib/mu_w": 0.8407407407407407, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.281372549019608, "calib/std_conf": 0.06827607525995273, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 994.0, "completions/max_terminated_length": 994.0, "completions/mean_length": 408.015625, "completions/mean_terminated_length": 414.4920959472656, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.04586666666666667, "grad_norm": 0.021807372570037842, "learning_rate": 4.361111111111112e-06, "loss": -0.0035, "num_tokens": 10571684.0, "reward": 1.1950891017913818, "reward_std": 0.3704569339752197, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6838207244873047, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8035672903060913, "step": 43 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5736035925196852, "calib/avg_num_step_conf": 5.66796875, "calib/ece": 0.35470588235294115, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.24313725490196078, "calib/gap": 0.021124507874015763, "calib/mean_conf": 0.8566666666666666, "calib/mu_c": 0.8671875, "calib/mu_w": 0.8460629921259842, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35470588235294115, "calib/std_conf": 0.06881822224104821, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2759.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 448.39453125, "completions/mean_terminated_length": 453.71148681640625, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.046933333333333334, "grad_norm": 0.1967300921678543, "learning_rate": 4.333333333333334e-06, "loss": -0.0194, "num_tokens": 10792793.0, "reward": 1.1023677587509155, "reward_std": 0.35060185194015503, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6275527477264404, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.785824716091156, "step": 44 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5842805476621028, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.22464843750000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.1015625, "calib/gap": 0.0259558253681218, "calib/mean_conf": 0.8418359375, "calib/mu_c": 0.8517721518987341, "calib/mu_w": 0.8258163265306123, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22464843750000002, "calib/std_conf": 0.06964298211949352, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1029.0, "completions/max_terminated_length": 1029.0, "completions/mean_length": 426.50390625, "completions/mean_terminated_length": 433.2738342285156, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.048, "grad_norm": 0.01614511013031006, "learning_rate": 4.305555555555556e-06, "loss": 0.0006, "num_tokens": 11007026.0, "reward": 1.2553672790527344, "reward_std": 0.36957302689552307, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.720680832862854, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8320383429527283, "step": 45 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.538738240177089, "calib/avg_num_step_conf": 5.8359375, "calib/ece": 0.298671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.00506794564348545, "calib/mean_conf": 0.8416406250000001, "calib/mu_c": 0.8439568345323741, "calib/mu_w": 0.8388888888888887, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.298671875, "calib/std_conf": 0.05026923362862592, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1568.0, "completions/max_terminated_length": 1568.0, "completions/mean_length": 458.9609375, "completions/mean_terminated_length": 466.2460632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.04906666666666667, "grad_norm": 0.02280016988515854, "learning_rate": 4.277777777777778e-06, "loss": -0.0062, "num_tokens": 11229288.0, "reward": 1.162341833114624, "reward_std": 0.3003543019294739, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6626296639442444, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8148629665374756, "step": 46 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6155878898603679, "calib/avg_num_step_conf": 5.6484375, "calib/ece": 0.20439215686274517, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.0209539344904085, "calib/mean_conf": 0.8177254901960784, "calib/mu_c": 0.8256962025316454, "calib/mu_w": 0.8047422680412369, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20125490196078438, "calib/std_conf": 0.0747723547611309, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 512.859375, "completions/mean_terminated_length": 518.9407348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.050133333333333335, "grad_norm": 0.019669126719236374, "learning_rate": 4.25e-06, "loss": 0.0083, "num_tokens": 11466556.0, "reward": 1.2581654787063599, "reward_std": 0.2482684850692749, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7264930009841919, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8413252830505371, "step": 47 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5424516211713444, "calib/avg_num_step_conf": 5.4609375, "calib/ece": 0.2054509803921569, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": 0.01006535947712428, "calib/mean_conf": 0.801921568627451, "calib/mu_c": 0.8059477124183007, "calib/mu_w": 0.7958823529411764, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20368627450980395, "calib/std_conf": 0.07898767417799572, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 496.2109375, "completions/mean_terminated_length": 504.08734130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.0512, "grad_norm": 0.018547268584370613, "learning_rate": 4.222222222222223e-06, "loss": -0.0186, "num_tokens": 11697274.0, "reward": 1.230703353881836, "reward_std": 0.3592337369918823, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7150160074234009, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8210784196853638, "step": 48 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5323356302038472, "calib/avg_num_step_conf": 5.78125, "calib/ece": 0.13845849802371538, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05138339920948617, "calib/gap": 0.00918963537180606, "calib/mean_conf": 0.8111857707509882, "calib/mu_c": 0.8141279069767442, "calib/mu_w": 0.8049382716049381, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13490118577075094, "calib/std_conf": 0.07396303608302854, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2479.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 579.01953125, "completions/mean_terminated_length": 583.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.05226666666666667, "grad_norm": 0.027712762355804443, "learning_rate": 4.194444444444445e-06, "loss": 0.0179, "num_tokens": 11950039.0, "reward": 1.3140902519226074, "reward_std": 0.35756993293762207, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7546730041503906, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8259067535400391, "step": 49 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5292220744680851, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.21779527559055106, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.1141732283464567, "calib/gap": 0.010610372340425722, "calib/mean_conf": 0.8433858267716535, "calib/mu_c": 0.8473124999999999, "calib/mu_w": 0.8367021276595742, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2156299212598424, "calib/std_conf": 0.07387711090490093, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2526.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 574.14453125, "completions/mean_terminated_length": 580.9525756835938, "completions/min_length": 0.0, "completions/min_terminated_length": 230.0, "epoch": 0.05333333333333334, "grad_norm": 0.018801305443048477, "learning_rate": 4.166666666666667e-06, "loss": -0.0004, "num_tokens": 12202380.0, "reward": 1.2519819736480713, "reward_std": 0.3390369415283203, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7151703238487244, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.800570011138916, "step": 50 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.616778569471088, "calib/avg_num_step_conf": 6.58203125, "calib/ece": 0.23719367588932816, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.45454545454545453, "calib/gap": 0.03487393806522343, "calib/mean_conf": 0.8854150197628459, "calib/mu_c": 0.8976829268292683, "calib/mu_w": 0.8628089887640449, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23719367588932816, "calib/std_conf": 0.06855601047921106, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 681.93359375, "completions/mean_terminated_length": 690.019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 228.0, "epoch": 0.0544, "grad_norm": 0.029241889715194702, "learning_rate": 4.138888888888889e-06, "loss": 0.0285, "num_tokens": 12486251.0, "reward": 1.2646057605743408, "reward_std": 0.3077329397201538, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7183948755264282, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7892470955848694, "step": 51 }, { "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.516793320198893, "calib/avg_num_step_conf": 8.91015625, "calib/ece": 0.18136065573770502, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.8073770491803278, "calib/gap": 0.04986621634299648, "calib/mean_conf": 0.9186557377049179, "calib/mu_c": 0.9303048128342246, "calib/mu_w": 0.8804385964912281, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16681147540983618, "calib/std_conf": 0.1229022849560032, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 766.2734375, "completions/mean_terminated_length": 781.537841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 340.0, "epoch": 0.055466666666666664, "grad_norm": 0.030566586181521416, "learning_rate": 4.111111111111111e-06, "loss": 0.0031, "num_tokens": 12790369.0, "reward": 1.344118356704712, "reward_std": 0.3666154146194458, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.7536975145339966, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.7829321026802063, "step": 52 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5497840251848598, "calib/avg_num_step_conf": 10.84375, "calib/ece": 0.318393442622951, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.9057377049180327, "calib/gap": 2.9284720692857036e-07, "calib/mean_conf": 0.9538852459016394, "calib/mu_c": 0.9538853503184714, "calib/mu_w": 0.9538850574712645, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3144180327868854, "calib/std_conf": 0.055462244833658796, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 855.8359375, "completions/mean_terminated_length": 865.9842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 388.0, "epoch": 0.05653333333333333, "grad_norm": 0.019293086603283882, "learning_rate": 4.083333333333334e-06, "loss": 0.0408, "num_tokens": 13115287.0, "reward": 1.1891376972198486, "reward_std": 0.4293547570705414, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6335127353668213, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.7206944823265076, "step": 53 }, { "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.5970141489804411, "calib/avg_num_step_conf": 12.82421875, "calib/ece": 0.19900689655172424, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.9137931034482759, "calib/gap": 0.02739475655430701, "calib/mean_conf": 0.9543517241379311, "calib/mu_c": 0.9607280898876405, "calib/mu_w": 0.9333333333333335, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1930586206896553, "calib/std_conf": 0.08936236284688913, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2895.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 941.28125, "completions/mean_terminated_length": 948.6929321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 393.0, "epoch": 0.0576, "grad_norm": 0.01729551889002323, "learning_rate": 4.055555555555556e-06, "loss": 0.078, "num_tokens": 13462487.0, "reward": 1.2738007307052612, "reward_std": 0.45870453119277954, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7087910175323486, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.7145369052886963, "step": 54 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6096219931271477, "calib/avg_num_step_conf": 9.41796875, "calib/ece": 0.3381672064777327, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9109311740890689, "calib/gap": 0.07674392439862543, "calib/mean_conf": 0.9434303643724696, "calib/mu_c": 0.9735686666666667, "calib/mu_w": 0.8968247422680413, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3371550607287448, "calib/std_conf": 0.1483297554958353, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 794.65234375, "completions/mean_terminated_length": 807.2659301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 345.0, "epoch": 0.058666666666666666, "grad_norm": 0.015972711145877838, "learning_rate": 4.027777777777779e-06, "loss": 0.0101, "num_tokens": 13773742.0, "reward": 1.161836862564087, "reward_std": 0.41535335779190063, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6316208839416504, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7188516855239868, "step": 55 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6191996871741399, "calib/avg_num_step_conf": 7.30078125, "calib/ece": 0.41285140562249006, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9558232931726908, "calib/gap": 0.016744655891553606, "calib/mean_conf": 0.9630522088353414, "calib/mu_c": 0.9705839416058394, "calib/mu_w": 0.9538392857142858, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41285140562249006, "calib/std_conf": 0.04857323048779692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 794.8359375, "completions/mean_terminated_length": 797.9530029296875, "completions/min_length": 0.0, "completions/min_terminated_length": 321.0, "epoch": 0.05973333333333333, "grad_norm": 0.013218984007835388, "learning_rate": 4.000000000000001e-06, "loss": 0.0225, "num_tokens": 14084060.0, "reward": 1.108764886856079, "reward_std": 0.3533090353012085, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5719242095947266, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7498536705970764, "step": 56 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6389833711262283, "calib/avg_num_step_conf": 6.890625, "calib/ece": 0.17944799999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.956, "calib/gap": 0.030040438397581304, "calib/mean_conf": 0.957848, "calib/mu_c": 0.9643367346938775, "calib/mu_w": 0.9342962962962962, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17664799999999997, "calib/std_conf": 0.07826023828228483, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2547.0, "completions/max_terminated_length": 2547.0, "completions/mean_length": 707.57421875, "completions/mean_terminated_length": 713.1456909179688, "completions/min_length": 0.0, "completions/min_terminated_length": 275.0, "epoch": 0.0608, "grad_norm": 0.0429513193666935, "learning_rate": 3.972222222222223e-06, "loss": -0.0167, "num_tokens": 14371991.0, "reward": 1.4135373830795288, "reward_std": 0.30638158321380615, "rewards/accuracy_reward_step": 0.765625, "rewards/final_brier_reward_step": 0.7856274843215942, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8294596076011658, "step": 57 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.507473173224323, "calib/avg_num_step_conf": 6.515625, "calib/ece": 0.3708627450980391, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9607843137254902, "calib/gap": -7.281553398075236e-05, "calib/mean_conf": 0.957529411764706, "calib/mu_c": 0.9574999999999999, "calib/mu_w": 0.9575728155339807, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36615686274509796, "calib/std_conf": 0.0651978445876063, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2570.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 725.23828125, "completions/mean_terminated_length": 733.8379516601562, "completions/min_length": 0.0, "completions/min_terminated_length": 351.0, "epoch": 0.06186666666666667, "grad_norm": 0.023280173540115356, "learning_rate": 3.944444444444445e-06, "loss": 0.0159, "num_tokens": 14663972.0, "reward": 1.1888699531555176, "reward_std": 0.3381074070930481, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6218597888946533, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.762526273727417, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6436332179930797, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.2987529411764707, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9568627450980393, "calib/gap": 0.013129411764705945, "calib/mean_conf": 0.9578117647058825, "calib/mu_c": 0.9621882352941177, "calib/mu_w": 0.9490588235294117, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29494901960784325, "calib/std_conf": 0.05208977788041442, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2303.0, "completions/max_terminated_length": 2303.0, "completions/mean_length": 654.76953125, "completions/mean_terminated_length": 662.5336303710938, "completions/min_length": 0.0, "completions/min_terminated_length": 271.0, "epoch": 0.06293333333333333, "grad_norm": 0.029063470661640167, "learning_rate": 3.916666666666667e-06, "loss": 0.0083, "num_tokens": 14937841.0, "reward": 1.2826330661773682, "reward_std": 0.33519983291625977, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6934150457382202, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7847735285758972, "step": 59 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5799521128583446, "calib/avg_num_step_conf": 5.9921875, "calib/ece": 0.3572047244094489, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9645669291338582, "calib/gap": 0.0144030285381479, "calib/mean_conf": 0.9595669291338583, "calib/mu_c": 0.9652941176470589, "calib/mu_w": 0.950891089108911, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3572047244094489, "calib/std_conf": 0.04124310308387457, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 645.96484375, "completions/mean_terminated_length": 651.0512084960938, "completions/min_length": 0.0, "completions/min_terminated_length": 317.0, "epoch": 0.064, "grad_norm": 0.03808138146996498, "learning_rate": 3.88888888888889e-06, "loss": 0.0358, "num_tokens": 15212064.0, "reward": 1.1904234886169434, "reward_std": 0.35033729672431946, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.63309645652771, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7457853555679321, "step": 60 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.530169268933314, "calib/avg_num_step_conf": 5.88671875, "calib/ece": 0.27327843137254904, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9686274509803922, "calib/gap": 0.004854954034729242, "calib/mean_conf": 0.9622980392156864, "calib/mu_c": 0.9637640449438203, "calib/mu_w": 0.958909090909091, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2687686274509804, "calib/std_conf": 0.05155044546391306, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2463.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 563.7890625, "completions/mean_terminated_length": 570.4743041992188, "completions/min_length": 0.0, "completions/min_terminated_length": 275.0, "epoch": 0.06506666666666666, "grad_norm": 0.030153486877679825, "learning_rate": 3.861111111111112e-06, "loss": 0.016, "num_tokens": 15460458.0, "reward": 1.320373773574829, "reward_std": 0.22351746261119843, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7159682512283325, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7881830930709839, "step": 61 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5430094401041666, "calib/avg_num_step_conf": 6.08203125, "calib/ece": 0.214609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.0028125000000001066, "calib/mean_conf": 0.964609375, "calib/mu_c": 0.9653125, "calib/mu_w": 0.9624999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.214609375, "calib/std_conf": 0.017428530119013918, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1563.0, "completions/max_terminated_length": 1563.0, "completions/mean_length": 645.98828125, "completions/mean_terminated_length": 656.2421264648438, "completions/min_length": 0.0, "completions/min_terminated_length": 315.0, "epoch": 0.06613333333333334, "grad_norm": 0.02634434588253498, "learning_rate": 3.833333333333334e-06, "loss": -0.0006, "num_tokens": 15732911.0, "reward": 1.4006956815719604, "reward_std": 0.2972757816314697, "rewards/accuracy_reward_step": 0.75, "rewards/final_brier_reward_step": 0.7671937942504883, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8355890512466431, "step": 62 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6425619834710744, "calib/avg_num_step_conf": 6.15234375, "calib/ece": 0.3030434782608697, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9525691699604744, "calib/gap": 0.016886363636363755, "calib/mean_conf": 0.9552173913043478, "calib/mu_c": 0.9610909090909092, "calib/mu_w": 0.9442045454545455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3030434782608697, "calib/std_conf": 0.031366319861488454, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2545.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 695.38671875, "completions/mean_terminated_length": 700.8621826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 299.0, "epoch": 0.0672, "grad_norm": 0.01735755056142807, "learning_rate": 3.8055555555555556e-06, "loss": 0.0179, "num_tokens": 16019570.0, "reward": 1.257283091545105, "reward_std": 0.3743683099746704, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.6799362897872925, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7827895283699036, "step": 63 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5614452912357105, "calib/avg_num_step_conf": 6.40234375, "calib/ece": 0.2993019607843138, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9450980392156862, "calib/gap": 0.027714071856287492, "calib/mean_conf": 0.9444, "calib/mu_c": 0.9539640718562875, "calib/mu_w": 0.92625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2944000000000001, "calib/std_conf": 0.09938373244404643, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2521.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 669.65625, "completions/mean_terminated_length": 677.5968627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 301.0, "epoch": 0.06826666666666667, "grad_norm": 0.01593654230237007, "learning_rate": 3.777777777777778e-06, "loss": 0.0033, "num_tokens": 16294778.0, "reward": 1.2759943008422852, "reward_std": 0.32874953746795654, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.6901290416717529, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8083797693252563, "step": 64 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5773874161609709, "calib/avg_num_step_conf": 6.44921875, "calib/ece": 0.35730078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9765625, "calib/gap": 0.008678696901948113, "calib/mean_conf": 0.96276953125, "calib/mu_c": 0.9661935483870967, "calib/mu_w": 0.9575148514851486, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35730078125, "calib/std_conf": 0.03492499789413413, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 620.375, "completions/mean_terminated_length": 630.2222290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 328.0, "epoch": 0.06933333333333333, "grad_norm": 0.019266171380877495, "learning_rate": 3.7500000000000005e-06, "loss": -0.0102, "num_tokens": 16558618.0, "reward": 1.1949491500854492, "reward_std": 0.3011029064655304, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6324816346168518, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7293462753295898, "step": 65 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6605113636363636, "calib/avg_num_step_conf": 6.34375, "calib/ece": 0.4190277777777777, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9325396825396826, "calib/gap": 0.043916666666666715, "calib/mean_conf": 0.9428373015873016, "calib/mu_c": 0.96375, "calib/mu_w": 0.9198333333333333, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4190277777777777, "calib/std_conf": 0.11225319186901574, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2509.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 740.83984375, "completions/mean_terminated_length": 746.6732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 343.0, "epoch": 0.0704, "grad_norm": 0.018004480749368668, "learning_rate": 3.7222222222222225e-06, "loss": 0.0023, "num_tokens": 16854625.0, "reward": 1.08465576171875, "reward_std": 0.3919914960861206, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5751608610153198, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7165870666503906, "step": 66 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6927555958862673, "calib/avg_num_step_conf": 6.68359375, "calib/ece": 0.2721600000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.976, "calib/gap": 0.020042347247428705, "calib/mean_conf": 0.9681600000000001, "calib/mu_c": 0.9742528735632183, "calib/mu_w": 0.9542105263157896, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2721600000000002, "calib/std_conf": 0.0520750842534124, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3058.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 731.2421875, "completions/mean_terminated_length": 737.0, "completions/min_length": 0.0, "completions/min_terminated_length": 414.0, "epoch": 0.07146666666666666, "grad_norm": 0.025369413197040558, "learning_rate": 3.694444444444445e-06, "loss": 0.0397, "num_tokens": 17146831.0, "reward": 1.3014209270477295, "reward_std": 0.3092834949493408, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.7032366991043091, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7915095090866089, "step": 67 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6287106350025813, "calib/avg_num_step_conf": 7.73046875, "calib/ece": 0.38471146245059307, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9762845849802372, "calib/gap": 0.02899122354155914, "calib/mean_conf": 0.9724584980237155, "calib/mu_c": 0.9843758389261746, "calib/mu_w": 0.9553846153846155, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.384118577075099, "calib/std_conf": 0.08026031792434658, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2427.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 745.9296875, "completions/mean_terminated_length": 754.7747192382812, "completions/min_length": 0.0, "completions/min_terminated_length": 376.0, "epoch": 0.07253333333333334, "grad_norm": 0.025018859654664993, "learning_rate": 3.6666666666666666e-06, "loss": 0.0161, "num_tokens": 17441877.0, "reward": 1.1590189933776855, "reward_std": 0.2395622730255127, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6111654043197632, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7085044384002686, "step": 68 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5624626221011363, "calib/avg_num_step_conf": 8.484375, "calib/ece": 0.392394, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0028788291580839065, "calib/mean_conf": 0.988394, "calib/mu_c": 0.9895570469798659, "calib/mu_w": 0.986678217821782, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.392394, "calib/std_conf": 0.011460617958906065, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2654.0, "completions/max_terminated_length": 2654.0, "completions/mean_length": 832.2421875, "completions/mean_terminated_length": 842.1107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 571.0, "epoch": 0.0736, "grad_norm": 0.02396257221698761, "learning_rate": 3.638888888888889e-06, "loss": -0.0051, "num_tokens": 17759427.0, "reward": 1.1528986692428589, "reward_std": 0.37283453345298767, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5922831296920776, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7146240472793579, "step": 69 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6067518248175183, "calib/avg_num_step_conf": 11.0, "calib/ece": 0.4247936714285716, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.9918367346938776, "calib/gap": 0.018091075020275627, "calib/mean_conf": 0.9839773448979593, "calib/mu_c": 0.9919521861313868, "calib/mu_w": 0.9738611111111112, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4247936714285716, "calib/std_conf": 0.07564979492676538, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 1093.33203125, "completions/mean_terminated_length": 1093.33203125, "completions/min_length": 626.0, "completions/min_terminated_length": 626.0, "epoch": 0.07466666666666667, "grad_norm": 0.013647088780999184, "learning_rate": 3.6111111111111115e-06, "loss": 0.0378, "num_tokens": 18146312.0, "reward": 1.0648112297058105, "reward_std": 0.3514804542064667, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5514882802963257, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.610100507736206, "step": 70 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5648386608089682, "calib/avg_num_step_conf": 12.8984375, "calib/ece": 0.3250951378539096, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9794238683127572, "calib/gap": 0.002710636030677338, "calib/mean_conf": 0.983860569952675, "calib/mu_c": 0.9847752701770188, "calib/mu_w": 0.9820646341463415, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3232021337386833, "calib/std_conf": 0.05523998934890417, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2509.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 1163.98046875, "completions/mean_terminated_length": 1177.78271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 655.0, "epoch": 0.07573333333333333, "grad_norm": 0.013985008001327515, "learning_rate": 3.5833333333333335e-06, "loss": 0.0229, "num_tokens": 18548699.0, "reward": 1.1860026121139526, "reward_std": 0.4622534215450287, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6370214819908142, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.6460515260696411, "step": 71 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5552371541501977, "calib/avg_num_step_conf": 12.12109375, "calib/ece": 0.4335063260885079, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9919354838709677, "calib/gap": 0.007931317065513643, "calib/mean_conf": 0.989957938991734, "calib/mu_c": 0.9934758618836956, "calib/mu_w": 0.985544544818182, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4335063260885079, "calib/std_conf": 0.03144958680661941, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 1088.58203125, "completions/mean_terminated_length": 1088.58203125, "completions/min_length": 585.0, "completions/min_terminated_length": 585.0, "epoch": 0.0768, "grad_norm": 0.034367017447948456, "learning_rate": 3.555555555555556e-06, "loss": 0.0321, "num_tokens": 18931784.0, "reward": 1.0714225769042969, "reward_std": 0.3781731128692627, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5464457273483276, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.6298699378967285, "step": 72 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.595415825063669, "calib/avg_num_step_conf": 9.12890625, "calib/ece": 0.2215952380952382, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": 0.010881057346096679, "calib/mean_conf": 0.987468253968254, "calib/mu_c": 0.9900158031088084, "calib/mu_w": 0.9791347457627118, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2215952380952382, "calib/std_conf": 0.027003186248779867, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2198.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 903.296875, "completions/mean_terminated_length": 917.6349487304688, "completions/min_length": 0.0, "completions/min_terminated_length": 524.0, "epoch": 0.07786666666666667, "grad_norm": 0.015287628397345543, "learning_rate": 3.5277777777777784e-06, "loss": -0.027, "num_tokens": 19270060.0, "reward": 1.386577844619751, "reward_std": 0.38972994685173035, "rewards/accuracy_reward_step": 0.75390625, "rewards/final_brier_reward_step": 0.7548409700393677, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7992828488349915, "step": 73 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5751596424010218, "calib/avg_num_step_conf": 7.390625, "calib/ece": 0.44533705179282884, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9880478087649402, "calib/gap": 0.010220095785440142, "calib/mean_conf": 0.983185657370518, "calib/mu_c": 0.9879088888888887, "calib/mu_w": 0.9776887931034486, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.44533705179282884, "calib/std_conf": 0.033894895452195664, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2312.0, "completions/max_terminated_length": 2312.0, "completions/mean_length": 785.55078125, "completions/mean_terminated_length": 791.7362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.07893333333333333, "grad_norm": 0.020915111526846886, "learning_rate": 3.5e-06, "loss": -0.0134, "num_tokens": 19575089.0, "reward": 1.0587660074234009, "reward_std": 0.41094404458999634, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5324146747589111, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.6479615569114685, "step": 74 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6387598369204514, "calib/avg_num_step_conf": 6.5859375, "calib/ece": 0.19560912698412702, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.008875272589361938, "calib/mean_conf": 0.9852916666666667, "calib/mu_c": 0.9871582914572865, "calib/mu_w": 0.9782830188679246, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.19560912698412702, "calib/std_conf": 0.012441898699714949, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 724.3515625, "completions/mean_terminated_length": 730.0551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 413.0, "epoch": 0.08, "grad_norm": 0.025613196194171906, "learning_rate": 3.4722222222222224e-06, "loss": 0.0117, "num_tokens": 19865275.0, "reward": 1.4001026153564453, "reward_std": 0.2639533281326294, "rewards/accuracy_reward_step": 0.77734375, "rewards/final_brier_reward_step": 0.757716715335846, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.7880062460899353, "step": 75 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.552127162225339, "calib/avg_num_step_conf": 5.91015625, "calib/ece": 0.34790393700787403, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00467329192546595, "calib/mean_conf": 0.9817622047244094, "calib/mu_c": 0.9834732919254658, "calib/mu_w": 0.9787999999999999, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.34790393700787403, "calib/std_conf": 0.016721097659849197, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2279.0, "completions/max_terminated_length": 2279.0, "completions/mean_length": 692.28515625, "completions/mean_terminated_length": 700.4940795898438, "completions/min_length": 0.0, "completions/min_terminated_length": 261.0, "epoch": 0.08106666666666666, "grad_norm": 0.02115590125322342, "learning_rate": 3.444444444444445e-06, "loss": 0.0151, "num_tokens": 20145556.0, "reward": 1.1858232021331787, "reward_std": 0.3526293933391571, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.615464448928833, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.6668908596038818, "step": 76 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5655759162303665, "calib/avg_num_step_conf": 5.18359375, "calib/ece": 0.218605577689243, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.9920318725099602, "calib/gap": 0.00495811518324607, "calib/mean_conf": 0.976772908366534, "calib/mu_c": 0.9779581151832462, "calib/mu_w": 0.9730000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.21721115537848604, "calib/std_conf": 0.029571040518692235, "calib/step_conf_rate": 0.93359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 683.703125, "completions/mean_terminated_length": 689.0866088867188, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.08213333333333334, "grad_norm": 0.03950350359082222, "learning_rate": 3.416666666666667e-06, "loss": -0.0013, "num_tokens": 20425248.0, "reward": 1.3432984352111816, "reward_std": 0.4074465334415436, "rewards/accuracy_reward_step": 0.74609375, "rewards/final_brier_reward_step": 0.7158539295196533, "rewards/format_reward_step": 0.92578125, "rewards/stepwise_brier_reward": 0.7471832633018494, "step": 77 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5686519141112352, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.3566535433070866, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.9881889763779528, "calib/gap": 0.003612187274279366, "calib/mean_conf": 0.9716141732283464, "calib/mu_c": 0.9729936305732485, "calib/mu_w": 0.9693814432989691, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.35507874015748025, "calib/std_conf": 0.049985753441866766, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2291.0, "completions/max_terminated_length": 2291.0, "completions/mean_length": 714.265625, "completions/mean_terminated_length": 719.8897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 412.0, "epoch": 0.0832, "grad_norm": 0.022809116169810295, "learning_rate": 3.3888888888888893e-06, "loss": 0.0207, "num_tokens": 20716124.0, "reward": 1.1449217796325684, "reward_std": 0.4311444163322449, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5842800140380859, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.6321260929107666, "step": 78 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5249189341604399, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.297764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9803921568627451, "calib/gap": 0.012097138023403176, "calib/mean_conf": 0.9734509803921569, "calib/mu_c": 0.9773410404624276, "calib/mu_w": 0.9652439024390245, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.29639215686274517, "calib/std_conf": 0.06805312622675691, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2265.0, "completions/max_terminated_length": 2265.0, "completions/mean_length": 654.4453125, "completions/mean_terminated_length": 662.20556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 264.0, "epoch": 0.08426666666666667, "grad_norm": 0.015963945537805557, "learning_rate": 3.3611111111111117e-06, "loss": 0.0026, "num_tokens": 20990038.0, "reward": 1.2741587162017822, "reward_std": 0.253301739692688, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6832543015480042, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7415051460266113, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.588724841562783, "calib/avg_num_step_conf": 5.6171875, "calib/ece": 0.30869921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9921875, "calib/gap": 0.007699770178981713, "calib/mean_conf": 0.9844804687500001, "calib/mu_c": 0.9869768786127169, "calib/mu_w": 0.9792771084337352, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30869921875, "calib/std_conf": 0.02404828332813537, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 623.68359375, "completions/mean_terminated_length": 633.5833740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.08533333333333333, "grad_norm": 0.02692350186407566, "learning_rate": 3.3333333333333333e-06, "loss": -0.0046, "num_tokens": 21251861.0, "reward": 1.2752444744110107, "reward_std": 0.25149065256118774, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6844933032989502, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7172658443450928, "step": 80 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5823503061072122, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.26160784313725494, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9686274509803922, "calib/gap": 0.0556771688815888, "calib/mean_conf": 0.9714117647058823, "calib/mu_c": 0.9875690607734806, "calib/mu_w": 0.9318918918918918, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26160784313725494, "calib/std_conf": 0.10817001441459848, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2343.0, "completions/max_terminated_length": 2343.0, "completions/mean_length": 671.90625, "completions/mean_terminated_length": 682.5714721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 409.0, "epoch": 0.0864, "grad_norm": 0.01904909871518612, "learning_rate": 3.3055555555555558e-06, "loss": 0.0224, "num_tokens": 21530117.0, "reward": 1.3242136240005493, "reward_std": 0.24336427450180054, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7291464805603027, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7552080154418945, "step": 81 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.564634724857685, "calib/avg_num_step_conf": 5.8046875, "calib/ece": 0.2544881889763781, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9921259842519685, "calib/gap": -0.0002956989247312469, "calib/mean_conf": 0.9822834645669294, "calib/mu_c": 0.9822043010752688, "calib/mu_w": 0.9825, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.25224409448818913, "calib/std_conf": 0.026714854058498144, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 612.71875, "completions/mean_terminated_length": 619.9841918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 364.0, "epoch": 0.08746666666666666, "grad_norm": 0.0393887422978878, "learning_rate": 3.277777777777778e-06, "loss": -0.007, "num_tokens": 21792525.0, "reward": 1.3466877937316895, "reward_std": 0.2821952700614929, "rewards/accuracy_reward_step": 0.7265625, "rewards/final_brier_reward_step": 0.7274277210235596, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7686984539031982, "step": 82 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6424838709677418, "calib/avg_num_step_conf": 5.6953125, "calib/ece": 0.36698039215686284, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9764705882352941, "calib/gap": 0.03409354838709677, "calib/mean_conf": 0.9748235294117648, "calib/mu_c": 0.9881935483870968, "calib/mu_w": 0.9541000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.36698039215686284, "calib/std_conf": 0.08685275027700912, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2296.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 694.421875, "completions/mean_terminated_length": 702.6561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 0.08853333333333334, "grad_norm": 0.015886960551142693, "learning_rate": 3.2500000000000002e-06, "loss": -0.0051, "num_tokens": 22077561.0, "reward": 1.1784580945968628, "reward_std": 0.20395585894584656, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6299129128456116, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6737630367279053, "step": 83 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5731372286250336, "calib/avg_num_step_conf": 6.5859375, "calib/ece": 0.3405098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9921568627450981, "calib/gap": 0.008575448941302688, "calib/mean_conf": 0.9836470588235295, "calib/mu_c": 0.9867073170731707, "calib/mu_w": 0.978131868131868, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3405098039215686, "calib/std_conf": 0.03523079286447134, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 589.5, "completions/mean_terminated_length": 598.857177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 356.0, "epoch": 0.0896, "grad_norm": 0.024586163461208344, "learning_rate": 3.2222222222222227e-06, "loss": 0.0113, "num_tokens": 22334393.0, "reward": 1.22707998752594, "reward_std": 0.33372047543525696, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6545144319534302, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6991181969642639, "step": 84 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6451747667455786, "calib/avg_num_step_conf": 6.40234375, "calib/ece": 0.32438735177865624, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9881422924901185, "calib/gap": 0.014869795293134369, "calib/mean_conf": 0.9844664031620555, "calib/mu_c": 0.9895209580838323, "calib/mu_w": 0.974651162790698, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.32438735177865624, "calib/std_conf": 0.03883953837029604, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 641.05078125, "completions/mean_terminated_length": 648.6522216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 408.0, "epoch": 0.09066666666666667, "grad_norm": 0.020817572250962257, "learning_rate": 3.1944444444444443e-06, "loss": -0.0066, "num_tokens": 22606326.0, "reward": 1.2418606281280518, "reward_std": 0.3128683567047119, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.6568652391433716, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7324519157409668, "step": 85 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6396774193548387, "calib/avg_num_step_conf": 6.37890625, "calib/ece": 0.3853333333333334, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9921568627450981, "calib/gap": 0.005719354838709445, "calib/mean_conf": 0.9931764705882353, "calib/mu_c": 0.9954193548387096, "calib/mu_w": 0.9897000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3853333333333334, "calib/std_conf": 0.012131155615309717, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2494.0, "completions/max_terminated_length": 2494.0, "completions/mean_length": 657.18359375, "completions/mean_terminated_length": 664.976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 364.0, "epoch": 0.09173333333333333, "grad_norm": 0.029312070459127426, "learning_rate": 3.1666666666666667e-06, "loss": 0.0274, "num_tokens": 22880077.0, "reward": 1.1700332164764404, "reward_std": 0.41688308119773865, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6090359091758728, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6609408855438232, "step": 86 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5267592592592593, "calib/avg_num_step_conf": 7.04296875, "calib/ece": 0.21003937007874018, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.004511111111111132, "calib/mean_conf": 0.9974409448818896, "calib/mu_c": 0.9984000000000001, "calib/mu_w": 0.9938888888888889, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.21003937007874018, "calib/std_conf": 0.016366450003631558, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2473.0, "completions/max_terminated_length": 2473.0, "completions/mean_length": 629.83203125, "completions/mean_terminated_length": 634.7913208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 404.0, "epoch": 0.0928, "grad_norm": 0.022811273112893105, "learning_rate": 3.138888888888889e-06, "loss": 0.0046, "num_tokens": 23146810.0, "reward": 1.4314923286437988, "reward_std": 0.25700902938842773, "rewards/accuracy_reward_step": 0.78125, "rewards/final_brier_reward_step": 0.7796496152877808, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8330384492874146, "step": 87 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6044117647058824, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.3346875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0028454172366619845, "calib/mean_conf": 0.99875, "calib/mu_c": 0.9997058823529411, "calib/mu_w": 0.9968604651162791, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3346875, "calib/std_conf": 0.0046770717334674305, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1704.0, "completions/max_terminated_length": 1704.0, "completions/mean_length": 639.36328125, "completions/mean_terminated_length": 649.511962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 368.0, "epoch": 0.09386666666666667, "grad_norm": 0.016953283920884132, "learning_rate": 3.1111111111111116e-06, "loss": -0.0045, "num_tokens": 23420335.0, "reward": 1.2677648067474365, "reward_std": 0.26132655143737793, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6661484241485596, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7486610412597656, "step": 88 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5436245793344135, "calib/avg_num_step_conf": 6.36328125, "calib/ece": 0.441921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.001472017948398352, "calib/mean_conf": 0.9987843137254903, "calib/mu_c": 0.99943661971831, "calib/mu_w": 0.9979646017699116, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.441921568627451, "calib/std_conf": 0.0045689219799028204, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 646.34375, "completions/mean_terminated_length": 654.0079345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 409.0, "epoch": 0.09493333333333333, "grad_norm": 0.01847773790359497, "learning_rate": 3.0833333333333336e-06, "loss": -0.0234, "num_tokens": 23694687.0, "reward": 1.0982062816619873, "reward_std": 0.3280404806137085, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5520206689834595, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6415855884552002, "step": 89 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5344256120527306, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.2972619047619047, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0008203389830508945, "calib/mean_conf": 0.999642857142857, "calib/mu_c": 0.9998870056497177, "calib/mu_w": 0.9990666666666668, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2972619047619047, "calib/std_conf": 0.0020585257697369763, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 638.8515625, "completions/mean_terminated_length": 641.3568725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 357.0, "epoch": 0.096, "grad_norm": 0.01750790700316429, "learning_rate": 3.055555555555556e-06, "loss": 0.0364, "num_tokens": 23961553.0, "reward": 1.3008742332458496, "reward_std": 0.2936910092830658, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.6919488310813904, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7615479230880737, "step": 90 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5173963133640553, "calib/avg_num_step_conf": 4.625, "calib/ece": 0.27265625000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0010875576036867285, "calib/mean_conf": 0.99921875, "calib/mu_c": 0.9995161290322582, "calib/mu_w": 0.9984285714285714, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.27265625000000004, "calib/std_conf": 0.004259066615762194, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 571.41015625, "completions/mean_terminated_length": 580.4801635742188, "completions/min_length": 0.0, "completions/min_terminated_length": 337.0, "epoch": 0.09706666666666666, "grad_norm": 0.019347215071320534, "learning_rate": 3.0277777777777776e-06, "loss": -0.0018, "num_tokens": 24215546.0, "reward": 1.3434937000274658, "reward_std": 0.3211195468902588, "rewards/accuracy_reward_step": 0.7265625, "rewards/final_brier_reward_step": 0.7231175899505615, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7524197101593018, "step": 91 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5821989528795811, "calib/avg_num_step_conf": 3.9296875, "calib/ece": 0.25160156250000015, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.008870720902134588, "calib/mean_conf": 0.9976953125000001, "calib/mu_c": 0.9999476439790577, "calib/mu_w": 0.9910769230769231, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.25160156250000015, "calib/std_conf": 0.022512169831612052, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 518.39453125, "completions/mean_terminated_length": 526.623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.09813333333333334, "grad_norm": 0.015678079798817635, "learning_rate": 3e-06, "loss": -0.0333, "num_tokens": 24454975.0, "reward": 1.3853814601898193, "reward_std": 0.281213641166687, "rewards/accuracy_reward_step": 0.74609375, "rewards/final_brier_reward_step": 0.7462066411972046, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8187564015388489, "step": 92 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5368400621118012, "calib/avg_num_step_conf": 3.125, "calib/ece": 0.2734251968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.006673913043478197, "calib/mean_conf": 0.9978346456692914, "calib/mu_c": 0.9996739130434783, "calib/mu_w": 0.9930000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.2734251968503937, "calib/std_conf": 0.022405449203448433, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2296.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 509.01171875, "completions/mean_terminated_length": 517.09130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.0992, "grad_norm": 0.019869696348905563, "learning_rate": 2.9722222222222225e-06, "loss": 0.0028, "num_tokens": 24691058.0, "reward": 1.327016830444336, "reward_std": 0.2369421124458313, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7060710787773132, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7543399930000305, "step": 93 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5343127252941577, "calib/avg_num_step_conf": 2.9921875, "calib/ece": 0.3394921875000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006862545058832215, "calib/mean_conf": 0.9996484375000001, "calib/mu_c": 0.9998816568047338, "calib/mu_w": 0.9991954022988506, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3394921875000001, "calib/std_conf": 0.001841746130332233, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1004.0, "completions/max_terminated_length": 1004.0, "completions/mean_length": 445.4140625, "completions/mean_terminated_length": 452.4841613769531, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.10026666666666667, "grad_norm": 0.01680610328912735, "learning_rate": 2.944444444444445e-06, "loss": -0.0127, "num_tokens": 24913764.0, "reward": 1.2525968551635742, "reward_std": 0.2342762053012848, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6606996059417725, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.716875433921814, "step": 94 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5220824080927708, "calib/avg_num_step_conf": 2.2734375, "calib/ece": 0.24355468750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.007580393124434481, "calib/mean_conf": 0.9974609375, "calib/mu_c": 0.9993264248704663, "calib/mu_w": 0.9917460317460318, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.24355468750000003, "calib/std_conf": 0.025560101459522696, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2179.0, "completions/max_terminated_length": 2179.0, "completions/mean_length": 507.3203125, "completions/mean_terminated_length": 515.373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.10133333333333333, "grad_norm": 0.019833406433463097, "learning_rate": 2.916666666666667e-06, "loss": 0.0243, "num_tokens": 25149766.0, "reward": 1.3548407554626465, "reward_std": 0.29544198513031006, "rewards/accuracy_reward_step": 0.75390625, "rewards/final_brier_reward_step": 0.721403956413269, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.7409278154373169, "step": 95 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6207537603515295, "calib/avg_num_step_conf": 2.296875, "calib/ece": 0.23090196078431374, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.984313725490196, "calib/gap": 0.03453861754267362, "calib/mean_conf": 0.991686274509804, "calib/mu_c": 0.9999484536082475, "calib/mu_w": 0.9654098360655738, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.23090196078431374, "calib/std_conf": 0.0570715032067457, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2300.0, "completions/max_terminated_length": 2300.0, "completions/mean_length": 473.95703125, "completions/mean_terminated_length": 479.57708740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.1024, "grad_norm": 0.016077203676104546, "learning_rate": 2.888888888888889e-06, "loss": 0.0054, "num_tokens": 25376915.0, "reward": 1.3902102708816528, "reward_std": 0.1878473460674286, "rewards/accuracy_reward_step": 0.7578125, "rewards/final_brier_reward_step": 0.7653882503509521, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7915465831756592, "step": 96 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5138356747552151, "calib/avg_num_step_conf": 2.3046875, "calib/ece": 0.31705882352941184, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0007662835249042654, "calib/mean_conf": 0.9994117647058824, "calib/mu_c": 0.9996551724137931, "calib/mu_w": 0.9988888888888888, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.31705882352941184, "calib/std_conf": 0.003657795707093656, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2293.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 443.546875, "completions/mean_terminated_length": 448.80633544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.10346666666666667, "grad_norm": 0.02357901819050312, "learning_rate": 2.861111111111111e-06, "loss": -0.0144, "num_tokens": 25595535.0, "reward": 1.2720140218734741, "reward_std": 0.25493234395980835, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.6721839904785156, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7127468585968018, "step": 97 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5371002692231927, "calib/avg_num_step_conf": 1.73046875, "calib/ece": 0.37984251968503935, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.002525444874909577, "calib/mean_conf": 0.9979527559055118, "calib/mu_c": 0.9989171974522292, "calib/mu_w": 0.9963917525773196, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.37984251968503935, "calib/std_conf": 0.009584976981900768, "calib/step_conf_rate": 0.921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 484.1484375, "completions/mean_terminated_length": 489.88934326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.10453333333333334, "grad_norm": 0.02231212519109249, "learning_rate": 2.8333333333333335e-06, "loss": 0.0096, "num_tokens": 25825661.0, "reward": 1.149230718612671, "reward_std": 0.3662552237510681, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5902304649353027, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.6355984210968018, "step": 98 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5839897474368593, "calib/avg_num_step_conf": 1.359375, "calib/ece": 0.4741106719367589, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.9723320158102767, "calib/gap": 0.022696924231057736, "calib/mean_conf": 0.9839920948616602, "calib/mu_c": 0.9951162790697675, "calib/mu_w": 0.9724193548387098, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.4741106719367589, "calib/std_conf": 0.062398312033446965, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 457.83203125, "completions/mean_terminated_length": 465.0992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1056, "grad_norm": 0.023881809785962105, "learning_rate": 2.805555555555556e-06, "loss": 0.0006, "num_tokens": 26048666.0, "reward": 0.9941545724868774, "reward_std": 0.3618253469467163, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5019761323928833, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.5254234075546265, "step": 99 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5740484200804802, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.3459765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.984375, "calib/gap": 0.02095190975658001, "calib/mean_conf": 0.9826953125000001, "calib/mu_c": 0.9903067484662575, "calib/mu_w": 0.9693548387096775, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.3459765625, "calib/std_conf": 0.0536641841969795, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1530.0, "completions/max_terminated_length": 1530.0, "completions/mean_length": 385.95703125, "completions/mean_terminated_length": 392.0833435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.10666666666666667, "grad_norm": 0.03753165155649185, "learning_rate": 2.7777777777777783e-06, "loss": -0.0029, "num_tokens": 26254879.0, "reward": 1.198153018951416, "reward_std": 0.20480790734291077, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.620968759059906, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6833622455596924, "step": 100 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5337664960948021, "calib/avg_num_step_conf": 0.9609375, "calib/ece": 0.3501984126984125, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.9761904761904762, "calib/gap": 0.017733638567196564, "calib/mean_conf": 0.9771825396825397, "calib/mu_c": 0.9837974683544304, "calib/mu_w": 0.9660638297872338, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.3501984126984125, "calib/std_conf": 0.057600829578689124, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 424.1796875, "completions/mean_terminated_length": 429.2095031738281, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.10773333333333333, "grad_norm": 0.020313022658228874, "learning_rate": 2.7500000000000004e-06, "loss": -0.0113, "num_tokens": 26470461.0, "reward": 1.1464192867279053, "reward_std": 0.3190857470035553, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5873413681983948, "rewards/format_reward_step": 0.90625, "rewards/stepwise_brier_reward": 0.6233353614807129, "step": 101 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5090357142857143, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.29764705882352943, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9686274509803922, "calib/gap": 0.003717857142856995, "calib/mean_conf": 0.9811764705882353, "calib/mu_c": 0.9823428571428571, "calib/mu_w": 0.9786250000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.29627450980392156, "calib/std_conf": 0.03394615452670737, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 330.8828125, "completions/mean_terminated_length": 334.80633544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1088, "grad_norm": 0.03096521832048893, "learning_rate": 2.7222222222222224e-06, "loss": -0.0047, "num_tokens": 26661863.0, "reward": 1.2824361324310303, "reward_std": 0.19478172063827515, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.6837546825408936, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7272400259971619, "step": 102 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5814902918863315, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.38301960784313727, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.9764705882352941, "calib/gap": 0.005951523723800922, "calib/mean_conf": 0.9849803921568628, "calib/mu_c": 0.9873376623376622, "calib/mu_w": 0.9813861386138613, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.3820392156862745, "calib/std_conf": 0.02167269013956109, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 951.0, "completions/max_terminated_length": 951.0, "completions/mean_length": 330.91015625, "completions/mean_terminated_length": 336.1627197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.10986666666666667, "grad_norm": 0.04819627106189728, "learning_rate": 2.6944444444444444e-06, "loss": -0.0128, "num_tokens": 26851128.0, "reward": 1.1573998928070068, "reward_std": 0.2900735139846802, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6051230430603027, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.6611952781677246, "step": 103 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.533690564178369, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.46964705882352953, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.984313725490196, "calib/gap": 0.002819660014781844, "calib/mean_conf": 0.9845490196078432, "calib/mu_c": 0.9859090909090908, "calib/mu_w": 0.983089430894309, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.46827450980392166, "calib/std_conf": 0.036284450148352014, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 968.0, "completions/max_terminated_length": 968.0, "completions/mean_length": 304.296875, "completions/mean_terminated_length": 309.12701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.11093333333333333, "grad_norm": 0.04776488617062569, "learning_rate": 2.666666666666667e-06, "loss": -0.0339, "num_tokens": 27035708.0, "reward": 1.050512433052063, "reward_std": 0.3880224823951721, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5253597497940063, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6337213516235352, "step": 104 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5634615384615385, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.37539062500000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.98046875, "calib/gap": 0.01225128205128212, "calib/mean_conf": 0.9847656250000001, "calib/mu_c": 0.9895512820512821, "calib/mu_w": 0.9773, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37539062500000003, "calib/std_conf": 0.02998606040078247, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 937.0, "completions/max_terminated_length": 937.0, "completions/mean_length": 311.53125, "completions/mean_terminated_length": 316.4761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.112, "grad_norm": 0.02812821790575981, "learning_rate": 2.6388888888888893e-06, "loss": -0.0442, "num_tokens": 27221220.0, "reward": 1.1907763481140137, "reward_std": 0.32648181915283203, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.625900387763977, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.703610897064209, "step": 105 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.562754814024964, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.4039453125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.96484375, "calib/gap": 0.011610738255033448, "calib/mean_conf": 0.9767578125, "calib/mu_c": 0.9816107382550335, "calib/mu_w": 0.9700000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.39933593750000007, "calib/std_conf": 0.07247329746337504, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 830.0, "completions/max_terminated_length": 830.0, "completions/mean_length": 309.93359375, "completions/mean_terminated_length": 314.8531799316406, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.11306666666666666, "grad_norm": 0.03354250267148018, "learning_rate": 2.6111111111111113e-06, "loss": 0.0087, "num_tokens": 27405147.0, "reward": 1.1469286680221558, "reward_std": 0.20517534017562866, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6007812023162842, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6666207909584045, "step": 106 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5450822492945144, "calib/avg_num_step_conf": 0.9765625, "calib/ece": 0.32448818897637804, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.984251968503937, "calib/gap": 0.02012664326519409, "calib/mean_conf": 0.981968503937008, "calib/mu_c": 0.9888622754491018, "calib/mu_w": 0.9687356321839077, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.32448818897637804, "calib/std_conf": 0.05303452107185328, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2573.0, "completions/max_terminated_length": 2573.0, "completions/mean_length": 336.5546875, "completions/mean_terminated_length": 340.54547119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.11413333333333334, "grad_norm": 0.02710435539484024, "learning_rate": 2.5833333333333337e-06, "loss": 0.0065, "num_tokens": 27595921.0, "reward": 1.2445693016052246, "reward_std": 0.3284000754356384, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.662137508392334, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7341084480285645, "step": 107 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5637289741767354, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.25152343749999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.984375, "calib/gap": 0.010803916923319834, "calib/mean_conf": 0.9823046875000001, "calib/mu_c": 0.9851322751322751, "calib/mu_w": 0.9743283582089552, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24777343749999994, "calib/std_conf": 0.07027680300445761, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 348.08984375, "completions/mean_terminated_length": 353.6151123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.1152, "grad_norm": 0.03617968410253525, "learning_rate": 2.5555555555555557e-06, "loss": -0.0045, "num_tokens": 27788264.0, "reward": 1.3766645193099976, "reward_std": 0.2723163962364197, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7463890314102173, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8110500574111938, "step": 108 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5411080501989592, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.454765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.97265625, "calib/gap": 0.015910621365166522, "calib/mean_conf": 0.9821093750000001, "calib/mu_c": 0.9896296296296294, "calib/mu_w": 0.9737190082644629, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.454765625, "calib/std_conf": 0.04779762585222591, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 939.0, "completions/max_terminated_length": 939.0, "completions/mean_length": 356.28515625, "completions/mean_terminated_length": 361.94049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.11626666666666667, "grad_norm": 0.027856063097715378, "learning_rate": 2.5277777777777778e-06, "loss": 0.0021, "num_tokens": 27984073.0, "reward": 1.0712552070617676, "reward_std": 0.20483094453811646, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.547326922416687, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6322251558303833, "step": 109 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5226346277622828, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2964843750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9921875, "calib/gap": 0.007933919759708585, "calib/mean_conf": 0.9878906250000001, "calib/mu_c": 0.9903389830508474, "calib/mu_w": 0.9824050632911389, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2964843750000001, "calib/std_conf": 0.028943704619647005, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 825.0, "completions/max_terminated_length": 825.0, "completions/mean_length": 333.375, "completions/mean_terminated_length": 338.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.11733333333333333, "grad_norm": 0.020607363432645798, "learning_rate": 2.5e-06, "loss": 0.0046, "num_tokens": 28174337.0, "reward": 1.3077452182769775, "reward_std": 0.29098767042160034, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7009003758430481, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7683620452880859, "step": 110 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5139521076588166, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.352734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.004376278118609522, "calib/mean_conf": 0.989453125, "calib/mu_c": 0.991042944785276, "calib/mu_w": 0.9866666666666665, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.352734375, "calib/std_conf": 0.02456841829940167, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 889.0, "completions/max_terminated_length": 889.0, "completions/mean_length": 326.3203125, "completions/mean_terminated_length": 331.5000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.1184, "grad_norm": 0.026003004983067513, "learning_rate": 2.4722222222222226e-06, "loss": 0.0115, "num_tokens": 28365283.0, "reward": 1.2241973876953125, "reward_std": 0.26709842681884766, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.6453027129173279, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7241427898406982, "step": 111 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.576318928950159, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.33925781250000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.96875, "calib/gap": 0.02577677624602326, "calib/mean_conf": 0.9798828125000001, "calib/mu_c": 0.9891463414634144, "calib/mu_w": 0.9633695652173911, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33925781250000003, "calib/std_conf": 0.0637774599062227, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 985.0, "completions/max_terminated_length": 985.0, "completions/mean_length": 360.10546875, "completions/mean_terminated_length": 365.8214416503906, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.11946666666666667, "grad_norm": 0.02440224215388298, "learning_rate": 2.4444444444444447e-06, "loss": 0.0042, "num_tokens": 28565390.0, "reward": 1.2357304096221924, "reward_std": 0.2610101103782654, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6624808311462402, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7179406881332397, "step": 112 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.563046949856276, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.37347656250000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.96484375, "calib/gap": 0.02066113062919206, "calib/mean_conf": 0.9789453125, "calib/mu_c": 0.9870967741935484, "calib/mu_w": 0.9664356435643563, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37347656250000005, "calib/std_conf": 0.06907919013188664, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 827.0, "completions/max_terminated_length": 827.0, "completions/mean_length": 294.93359375, "completions/mean_terminated_length": 299.6150817871094, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.12053333333333334, "grad_norm": 0.03255468234419823, "learning_rate": 2.4166666666666667e-06, "loss": 0.0091, "num_tokens": 28746093.0, "reward": 1.185679316520691, "reward_std": 0.2895708680152893, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6267378926277161, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6941040754318237, "step": 113 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4817150063051703, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.22703124999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.007182849936948177, "calib/mean_conf": 0.98875, "calib/mu_c": 0.9904615384615385, "calib/mu_w": 0.9832786885245903, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22703124999999996, "calib/std_conf": 0.029448047473474365, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 951.0, "completions/max_terminated_length": 951.0, "completions/mean_length": 292.88671875, "completions/mean_terminated_length": 297.5357360839844, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.1216, "grad_norm": 0.021018045023083687, "learning_rate": 2.388888888888889e-06, "loss": 0.0135, "num_tokens": 28926096.0, "reward": 1.3907405138015747, "reward_std": 0.18657159805297852, "rewards/accuracy_reward_step": 0.76171875, "rewards/final_brier_reward_step": 0.768693745136261, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7473931312561035, "step": 114 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47942307692307695, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.3816796875000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9765625, "calib/gap": -0.005243589743589738, "calib/mean_conf": 0.9823046875000001, "calib/mu_c": 0.9802564102564102, "calib/mu_w": 0.9854999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37730468750000007, "calib/std_conf": 0.053485549829158006, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 765.0, "completions/max_terminated_length": 765.0, "completions/mean_length": 299.89453125, "completions/mean_terminated_length": 304.65478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.12266666666666666, "grad_norm": 0.02113684080541134, "learning_rate": 2.361111111111111e-06, "loss": -0.0278, "num_tokens": 29108133.0, "reward": 1.1730939149856567, "reward_std": 0.31790691614151, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6136327981948853, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6451491117477417, "step": 115 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5217391304347826, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.3453515625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.984375, "calib/gap": 0.011195652173913162, "calib/mean_conf": 0.9859765625000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9788043478260867, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3453515625000001, "calib/std_conf": 0.034047064127228265, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 852.0, "completions/max_terminated_length": 852.0, "completions/mean_length": 359.3125, "completions/mean_terminated_length": 365.0158996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12373333333333333, "grad_norm": 0.022788511589169502, "learning_rate": 2.3333333333333336e-06, "loss": 0.002, "num_tokens": 29304637.0, "reward": 1.2408850193023682, "reward_std": 0.3243008852005005, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6545034646987915, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7465365529060364, "step": 116 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49245002784825787, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.4237499999999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.96484375, "calib/gap": -0.006080821833034378, "calib/mean_conf": 0.9749218750000002, "calib/mu_c": 0.972237762237762, "calib/mu_w": 0.9783185840707964, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4200390624999999, "calib/std_conf": 0.07972115557669979, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 858.0, "completions/max_terminated_length": 858.0, "completions/mean_length": 335.1171875, "completions/mean_terminated_length": 340.4365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.1248, "grad_norm": 0.019920989871025085, "learning_rate": 2.305555555555556e-06, "loss": 0.0054, "num_tokens": 29497027.0, "reward": 1.1118080615997314, "reward_std": 0.3299786448478699, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5707499980926514, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6421070098876953, "step": 117 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5703170970905524, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.3369921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.92578125, "calib/gap": 0.05509970578620471, "calib/mean_conf": 0.9567578125, "calib/mu_c": 0.9772049689440993, "calib/mu_w": 0.9221052631578945, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.332421875, "calib/std_conf": 0.11857238335807728, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 834.0, "completions/max_terminated_length": 834.0, "completions/mean_length": 320.484375, "completions/mean_terminated_length": 325.5714416503906, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.12586666666666665, "grad_norm": 0.015183635987341404, "learning_rate": 2.277777777777778e-06, "loss": 0.0124, "num_tokens": 29683079.0, "reward": 1.2166857719421387, "reward_std": 0.22400817275047302, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6707894206047058, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.680328905582428, "step": 118 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5442301405688132, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.3358984375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.94921875, "calib/gap": 0.05902386400784598, "calib/mean_conf": 0.9648046875, "calib/mu_c": 0.9867080745341615, "calib/mu_w": 0.9276842105263156, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3358984375000001, "calib/std_conf": 0.12617247363441578, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 366.09375, "completions/mean_terminated_length": 371.90478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.12693333333333334, "grad_norm": 0.013166883029043674, "learning_rate": 2.25e-06, "loss": 0.0059, "num_tokens": 29881863.0, "reward": 1.2195494174957275, "reward_std": 0.3772987127304077, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6654199361801147, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6971527338027954, "step": 119 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6127808471454881, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.24960937500000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.90625, "calib/gap": 0.09296500920810302, "calib/mean_conf": 0.9490625000000001, "calib/mu_c": 0.9762983425414363, "calib/mu_w": 0.8833333333333333, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24582031250000003, "calib/std_conf": 0.13450974349001635, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 953.0, "completions/max_terminated_length": 953.0, "completions/mean_length": 313.67578125, "completions/mean_terminated_length": 318.65478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.128, "grad_norm": 0.015285477042198181, "learning_rate": 2.222222222222222e-06, "loss": -0.0113, "num_tokens": 30068852.0, "reward": 1.3414921760559082, "reward_std": 0.20560118556022644, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7547031044960022, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7831406593322754, "step": 120 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5843425605536332, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.2356470588235293, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": 0.059505347593582614, "calib/mean_conf": 0.9611372549019608, "calib/mu_c": 0.9770053475935828, "calib/mu_w": 0.9175000000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2317254901960783, "calib/std_conf": 0.11671937514243685, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1688.0, "completions/max_terminated_length": 1688.0, "completions/mean_length": 360.75, "completions/mean_terminated_length": 366.4762268066406, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.12906666666666666, "grad_norm": 0.011736109852790833, "learning_rate": 2.1944444444444445e-06, "loss": 0.0006, "num_tokens": 30266260.0, "reward": 1.3648300170898438, "reward_std": 0.3142578601837158, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.7590675354003906, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7861902713775635, "step": 121 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6031746031746033, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.28274509803921566, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9019607843137255, "calib/gap": 0.08356307435254828, "calib/mean_conf": 0.9462745098039216, "calib/mu_c": 0.9738011695906432, "calib/mu_w": 0.8902380952380949, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27921568627450977, "calib/std_conf": 0.13888859669327203, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2479.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 334.4296875, "completions/mean_terminated_length": 338.395263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.13013333333333332, "grad_norm": 0.017932435497641563, "learning_rate": 2.166666666666667e-06, "loss": 0.0297, "num_tokens": 30459218.0, "reward": 1.2807669639587402, "reward_std": 0.2884066104888916, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7179101705551147, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7371886968612671, "step": 122 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4874829001367989, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.3243359375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9375, "calib/gap": -0.00967031463748258, "calib/mean_conf": 0.9616015625000001, "calib/mu_c": 0.9583529411764706, "calib/mu_w": 0.9680232558139532, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3109375000000001, "calib/std_conf": 0.11241391427469552, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 394.01953125, "completions/mean_terminated_length": 400.2738342285156, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.1312, "grad_norm": 0.012971178628504276, "learning_rate": 2.138888888888889e-06, "loss": -0.01, "num_tokens": 30665375.0, "reward": 1.2534246444702148, "reward_std": 0.2954699397087097, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.671435534954071, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6860131025314331, "step": 123 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6613669590643274, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.22710937500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.859375, "calib/gap": 0.13600584795321624, "calib/mean_conf": 0.9302343750000001, "calib/mu_c": 0.9706111111111112, "calib/mu_w": 0.834605263157895, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22710937500000006, "calib/std_conf": 0.15143823433452785, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 339.046875, "completions/mean_terminated_length": 344.4285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.13226666666666667, "grad_norm": 0.015886466950178146, "learning_rate": 2.1111111111111114e-06, "loss": -0.0014, "num_tokens": 30858987.0, "reward": 1.3479766845703125, "reward_std": 0.26100507378578186, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7735273241996765, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8058797121047974, "step": 124 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5854402515723272, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.3273437500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.8125, "calib/gap": 0.08209056603773568, "calib/mean_conf": 0.909609375, "calib/mu_c": 0.9435999999999998, "calib/mu_w": 0.8615094339622641, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3255078125000001, "calib/std_conf": 0.17423001294871493, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 362.8671875, "completions/mean_terminated_length": 368.62701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.13333333333333333, "grad_norm": 0.014908002689480782, "learning_rate": 2.0833333333333334e-06, "loss": -0.0153, "num_tokens": 31056689.0, "reward": 1.1630195379257202, "reward_std": 0.3463001549243927, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6620984077453613, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6462297439575195, "step": 125 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6354560876941147, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.2899999999999999, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.7803921568627451, "calib/gap": 0.0995302101004828, "calib/mean_conf": 0.9096078431372551, "calib/mu_c": 0.9474683544303797, "calib/mu_w": 0.8479381443298969, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2899999999999999, "calib/std_conf": 0.16010487719988759, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1540.0, "completions/max_terminated_length": 1540.0, "completions/mean_length": 376.54296875, "completions/mean_terminated_length": 382.5198669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.1344, "grad_norm": 0.024794060736894608, "learning_rate": 2.0555555555555555e-06, "loss": 0.0038, "num_tokens": 31258548.0, "reward": 1.2156885862350464, "reward_std": 0.2662501931190491, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6987495422363281, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.699161171913147, "step": 126 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5325275157232704, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.26372549019607844, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.6470588235294118, "calib/gap": 0.027211084905660154, "calib/mean_conf": 0.853529411764706, "calib/mu_c": 0.8637735849056604, "calib/mu_w": 0.8365625000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24686274509803924, "calib/std_conf": 0.1925293308890756, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 912.0, "completions/max_terminated_length": 912.0, "completions/mean_length": 325.46484375, "completions/mean_terminated_length": 330.6309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.13546666666666668, "grad_norm": 0.01905258744955063, "learning_rate": 2.027777777777778e-06, "loss": -0.0077, "num_tokens": 31445539.0, "reward": 1.202289342880249, "reward_std": 0.2972995638847351, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6853792667388916, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6433097124099731, "step": 127 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6043213401865601, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.2910546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.73046875, "calib/gap": 0.07191890348372376, "calib/mean_conf": 0.8887109375000001, "calib/mu_c": 0.9176470588235295, "calib/mu_w": 0.8457281553398057, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2910546875, "calib/std_conf": 0.1733389315556984, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1837.0, "completions/max_terminated_length": 1837.0, "completions/mean_length": 368.7265625, "completions/mean_terminated_length": 374.5793762207031, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.13653333333333334, "grad_norm": 0.020467208698391914, "learning_rate": 2.0000000000000003e-06, "loss": -0.0036, "num_tokens": 31646597.0, "reward": 1.1866086721420288, "reward_std": 0.3052266836166382, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6792874932289124, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6804282665252686, "step": 128 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6342788971367974, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.29539062499999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.8515625, "calib/gap": 0.1035577942735948, "calib/mean_conf": 0.936015625, "calib/mu_c": 0.9732317073170731, "calib/mu_w": 0.8696739130434783, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29539062499999996, "calib/std_conf": 0.1356370377362296, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1008.0, "completions/max_terminated_length": 1008.0, "completions/mean_length": 319.37109375, "completions/mean_terminated_length": 324.44049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.1376, "grad_norm": 0.016260705888271332, "learning_rate": 1.9722222222222224e-06, "loss": 0.0007, "num_tokens": 31830740.0, "reward": 1.249778151512146, "reward_std": 0.26067492365837097, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7118054628372192, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7248071432113647, "step": 129 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6477610930735931, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.29218750000000016, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.10833333333333328, "calib/mean_conf": 0.9435937500000001, "calib/mu_c": 0.9808333333333333, "calib/mu_w": 0.8725, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28976562500000014, "calib/std_conf": 0.12703490154653366, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 728.0, "completions/max_terminated_length": 728.0, "completions/mean_length": 312.30859375, "completions/mean_terminated_length": 317.2658996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.13866666666666666, "grad_norm": 0.013571816496551037, "learning_rate": 1.944444444444445e-06, "loss": -0.0122, "num_tokens": 32015979.0, "reward": 1.27852201461792, "reward_std": 0.17848068475723267, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7245867252349854, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7645012140274048, "step": 130 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6097732843137256, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.45964843749999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.796875, "calib/gap": 0.07297549019607852, "calib/mean_conf": 0.9283984375000001, "calib/mu_c": 0.9671666666666667, "calib/mu_w": 0.8941911764705882, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.45964843749999995, "calib/std_conf": 0.13437708369569043, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 747.0, "completions/max_terminated_length": 747.0, "completions/mean_length": 327.9296875, "completions/mean_terminated_length": 333.13494873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.13973333333333332, "grad_norm": 0.019835758954286575, "learning_rate": 1.916666666666667e-06, "loss": -0.0224, "num_tokens": 32206137.0, "reward": 0.9983835816383362, "reward_std": 0.2633681893348694, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5579879283905029, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.5605465173721313, "step": 131 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6410154173312068, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.18517647058823516, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.8862745098039215, "calib/gap": 0.06606326422115882, "calib/mean_conf": 0.9616470588235294, "calib/mu_c": 0.9764141414141413, "calib/mu_w": 0.9103508771929825, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.18517647058823516, "calib/std_conf": 0.08703184531648525, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 371.50390625, "completions/mean_terminated_length": 375.90911865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.1408, "grad_norm": 0.013253241777420044, "learning_rate": 1.888888888888889e-06, "loss": 0.0162, "num_tokens": 32406834.0, "reward": 1.423046588897705, "reward_std": 0.33857858180999756, "rewards/accuracy_reward_step": 0.7734375, "rewards/final_brier_reward_step": 0.8023840188980103, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8077710270881653, "step": 132 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6080108010801082, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.3231764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.058024302430243035, "calib/mean_conf": 0.9247450980392157, "calib/mu_c": 0.9477272727272726, "calib/mu_w": 0.8897029702970296, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.32199999999999995, "calib/std_conf": 0.14430847177137726, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2035.0, "completions/max_terminated_length": 2035.0, "completions/mean_length": 415.50390625, "completions/mean_terminated_length": 420.43084716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.14186666666666667, "grad_norm": 0.014483300037682056, "learning_rate": 1.8611111111111113e-06, "loss": -0.0098, "num_tokens": 32619547.0, "reward": 1.1818854808807373, "reward_std": 0.4062906503677368, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6587026715278625, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6704014539718628, "step": 133 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6919596354166666, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.26738281250000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.65234375, "calib/gap": 0.12610416666666668, "calib/mean_conf": 0.8890234375000001, "calib/mu_c": 0.9363125, "calib/mu_w": 0.8102083333333333, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2657031250000001, "calib/std_conf": 0.15776089328690934, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1023.0, "completions/max_terminated_length": 1023.0, "completions/mean_length": 386.45703125, "completions/mean_terminated_length": 392.5912780761719, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.14293333333333333, "grad_norm": 0.014694559387862682, "learning_rate": 1.8333333333333333e-06, "loss": -0.0216, "num_tokens": 32827432.0, "reward": 1.2445417642593384, "reward_std": 0.3413343131542206, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7301394939422607, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7480278015136719, "step": 134 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6401930501930502, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.19192156862745102, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.7137254901960784, "calib/gap": 0.07342471042471044, "calib/mean_conf": 0.9174117647058825, "calib/mu_c": 0.9375675675675677, "calib/mu_w": 0.8641428571428572, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19192156862745102, "calib/std_conf": 0.12056552631702604, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 378.52734375, "completions/mean_terminated_length": 384.5357360839844, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.144, "grad_norm": 0.024551788344979286, "learning_rate": 1.8055555555555557e-06, "loss": -0.0083, "num_tokens": 33030215.0, "reward": 1.3647527694702148, "reward_std": 0.20473483204841614, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7756797075271606, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7966128587722778, "step": 135 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6559471658037966, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2877254901960785, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7843137254901961, "calib/gap": 0.06526483472720013, "calib/mean_conf": 0.922, "calib/mu_c": 0.9458024691358025, "calib/mu_w": 0.8805376344086023, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.28721568627450994, "calib/std_conf": 0.13271050610774018, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1256.0, "completions/max_terminated_length": 1256.0, "completions/mean_length": 352.16796875, "completions/mean_terminated_length": 357.7579650878906, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.14506666666666668, "grad_norm": 0.021417800337076187, "learning_rate": 1.777777777777778e-06, "loss": 0.0198, "num_tokens": 33228858.0, "reward": 1.2192248106002808, "reward_std": 0.30028027296066284, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6881940960884094, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.669173538684845, "step": 136 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6652343749999999, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.3201171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.87890625, "calib/gap": 0.0743125, "calib/mean_conf": 0.9439453125000001, "calib/mu_c": 0.9718125000000001, "calib/mu_w": 0.8975000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31953125, "calib/std_conf": 0.13557048493044987, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1037.0, "completions/max_terminated_length": 1037.0, "completions/mean_length": 352.87890625, "completions/mean_terminated_length": 358.4801940917969, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.14613333333333334, "grad_norm": 0.011814955621957779, "learning_rate": 1.75e-06, "loss": -0.0004, "num_tokens": 33426179.0, "reward": 1.2254364490509033, "reward_std": 0.14266234636306763, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6803535223007202, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.721392035484314, "step": 137 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5976331360946746, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.293203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.90234375, "calib/gap": 0.06254641909814318, "calib/mean_conf": 0.9533593750000001, "calib/mu_c": 0.9746153846153844, "calib/mu_w": 0.9120689655172413, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.293203125, "calib/std_conf": 0.11556435804178283, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1091.0, "completions/max_terminated_length": 1091.0, "completions/mean_length": 364.7265625, "completions/mean_terminated_length": 370.5158996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.1472, "grad_norm": 0.017470847815275192, "learning_rate": 1.7222222222222224e-06, "loss": 0.0029, "num_tokens": 33623885.0, "reward": 1.2584319114685059, "reward_std": 0.2366119623184204, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6936300992965698, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7111909985542297, "step": 138 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5763995167136529, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2256249999999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.92578125, "calib/gap": 0.05344663713250086, "calib/mean_conf": 0.9669531250000001, "calib/mu_c": 0.980523560209424, "calib/mu_w": 0.9270769230769231, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2232421874999999, "calib/std_conf": 0.08680580512116902, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 720.0, "completions/max_terminated_length": 720.0, "completions/mean_length": 322.30078125, "completions/mean_terminated_length": 327.41668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.14826666666666666, "grad_norm": 0.0096996258944273, "learning_rate": 1.6944444444444446e-06, "loss": -0.0035, "num_tokens": 33809490.0, "reward": 1.3900963068008423, "reward_std": 0.14743438363075256, "rewards/accuracy_reward_step": 0.74609375, "rewards/final_brier_reward_step": 0.7744976282119751, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.801512598991394, "step": 139 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.513157894736842, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2860937499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.0026315789473686513, "calib/mean_conf": 0.98921875, "calib/mu_c": 0.99, "calib/mu_w": 0.9873684210526313, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2860937499999999, "calib/std_conf": 0.011865744748539802, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 762.0, "completions/max_terminated_length": 762.0, "completions/mean_length": 350.31640625, "completions/mean_terminated_length": 355.87701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.14933333333333335, "grad_norm": 0.010826703161001205, "learning_rate": 1.6666666666666667e-06, "loss": -0.0064, "num_tokens": 34004187.0, "reward": 1.3155367374420166, "reward_std": 0.17684701085090637, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7103679180145264, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7392792701721191, "step": 140 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5848929953407564, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.23175781249999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.94921875, "calib/gap": 0.058680407486377595, "calib/mean_conf": 0.9700390625000002, "calib/mu_c": 0.9853968253968255, "calib/mu_w": 0.9267164179104479, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23175781249999997, "calib/std_conf": 0.095694411274228, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 979.0, "completions/max_terminated_length": 979.0, "completions/mean_length": 366.62890625, "completions/mean_terminated_length": 372.44842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.1504, "grad_norm": 0.010468649677932262, "learning_rate": 1.638888888888889e-06, "loss": -0.0045, "num_tokens": 34205140.0, "reward": 1.3810522556304932, "reward_std": 0.1601087749004364, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7665854692459106, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8044988512992859, "step": 141 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.569258064516129, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.35909803921568645, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9568627450980393, "calib/gap": 0.05797741935483869, "calib/mean_conf": 0.9669411764705882, "calib/mu_c": 0.9896774193548386, "calib/mu_w": 0.9316999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35909803921568645, "calib/std_conf": 0.11005549611930746, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2213.0, "completions/max_terminated_length": 2213.0, "completions/mean_length": 380.06640625, "completions/mean_terminated_length": 384.5731506347656, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.15146666666666667, "grad_norm": 0.012077374383807182, "learning_rate": 1.6111111111111113e-06, "loss": -0.0117, "num_tokens": 34407597.0, "reward": 1.184699296951294, "reward_std": 0.2489548921585083, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6456745862960815, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.675153911113739, "step": 142 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5971639042357274, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.273671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.96875, "calib/gap": 0.029658931860036875, "calib/mean_conf": 0.980703125, "calib/mu_c": 0.9893922651933701, "calib/mu_w": 0.9597333333333332, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.273671875, "calib/std_conf": 0.049421395824423786, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 896.0, "completions/max_terminated_length": 896.0, "completions/mean_length": 338.8671875, "completions/mean_terminated_length": 344.2460632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.15253333333333333, "grad_norm": 0.020322734490036964, "learning_rate": 1.5833333333333333e-06, "loss": 0.0028, "num_tokens": 34601683.0, "reward": 1.3220750093460083, "reward_std": 0.21661823987960815, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7278101444244385, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7323650121688843, "step": 143 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5573718708047066, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.23898437500000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.96875, "calib/gap": 0.048454552633656944, "calib/mean_conf": 0.9772656250000001, "calib/mu_c": 0.9899470899470898, "calib/mu_w": 0.9414925373134329, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23898437500000005, "calib/std_conf": 0.0710876972011288, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 363.5234375, "completions/mean_terminated_length": 369.2936706542969, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.1536, "grad_norm": 0.013965306803584099, "learning_rate": 1.5555555555555558e-06, "loss": -0.0281, "num_tokens": 34798873.0, "reward": 1.3738117218017578, "reward_std": 0.3135017156600952, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7594300508499146, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7865979671478271, "step": 144 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5660472486315182, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2741015625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9375, "calib/gap": 0.04783491789109795, "calib/mean_conf": 0.9694140625000001, "calib/mu_c": 0.9839887640449438, "calib/mu_w": 0.9361538461538459, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2741015625000001, "calib/std_conf": 0.09136576794536394, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 967.0, "completions/max_terminated_length": 967.0, "completions/mean_length": 361.81640625, "completions/mean_terminated_length": 367.5595397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.15466666666666667, "grad_norm": 0.015777001157402992, "learning_rate": 1.527777777777778e-06, "loss": 0.0139, "num_tokens": 34994202.0, "reward": 1.3016703128814697, "reward_std": 0.31495898962020874, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7249355316162109, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7004954814910889, "step": 145 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5739130434782609, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.42062745098039234, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9490196078431372, "calib/gap": 0.04513043478260881, "calib/mean_conf": 0.9696470588235295, "calib/mu_c": 0.99, "calib/mu_w": 0.9448695652173912, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.42062745098039234, "calib/std_conf": 0.09128426265790035, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2239.0, "completions/max_terminated_length": 2239.0, "completions/mean_length": 386.55078125, "completions/mean_terminated_length": 391.1343994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.15573333333333333, "grad_norm": 0.014052913524210453, "learning_rate": 1.5e-06, "loss": 0.0087, "num_tokens": 35200375.0, "reward": 1.097890019416809, "reward_std": 0.30348002910614014, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5871105194091797, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6247615814208984, "step": 146 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5906492699596146, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.3974609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.91796875, "calib/gap": 0.056446722584654, "calib/mean_conf": 0.9580078125, "calib/mu_c": 0.9824827586206897, "calib/mu_w": 0.9260360360360357, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39453125, "calib/std_conf": 0.11101597548985839, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 360.828125, "completions/mean_terminated_length": 366.5555725097656, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.1568, "grad_norm": 0.011779509484767914, "learning_rate": 1.4722222222222225e-06, "loss": 0.0016, "num_tokens": 35396427.0, "reward": 1.1296390295028687, "reward_std": 0.28417152166366577, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6164590120315552, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6364718079566956, "step": 147 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5446945637960022, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.1909803921568628, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9764705882352941, "calib/gap": 0.025635157855408153, "calib/mean_conf": 0.9831372549019607, "calib/mu_c": 0.9884653465346536, "calib/mu_w": 0.9628301886792454, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1909803921568628, "calib/std_conf": 0.046462727886559946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 807.0, "completions/max_terminated_length": 807.0, "completions/mean_length": 328.30859375, "completions/mean_terminated_length": 333.5198669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.15786666666666666, "grad_norm": 0.01572262868285179, "learning_rate": 1.4444444444444445e-06, "loss": 0.0119, "num_tokens": 35585586.0, "reward": 1.4426770210266113, "reward_std": 0.1526944637298584, "rewards/accuracy_reward_step": 0.7890625, "rewards/final_brier_reward_step": 0.802019476890564, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8163450956344604, "step": 148 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5901232913428036, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.32039215686274514, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9294117647058824, "calib/gap": 0.06119002948271213, "calib/mean_conf": 0.9635294117647059, "calib/mu_c": 0.9853658536585364, "calib/mu_w": 0.9241758241758242, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32039215686274514, "calib/std_conf": 0.09711572901040597, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 409.1640625, "completions/mean_terminated_length": 414.0158386230469, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.15893333333333334, "grad_norm": 0.010849043726921082, "learning_rate": 1.4166666666666667e-06, "loss": -0.0003, "num_tokens": 35794788.0, "reward": 1.2346031665802002, "reward_std": 0.22302758693695068, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6838117241859436, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.696007490158081, "step": 149 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6152935441186712, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.2699609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.89453125, "calib/gap": 0.07624556027578533, "calib/mean_conf": 0.9457421875, "calib/mu_c": 0.9704624277456647, "calib/mu_w": 0.8942168674698794, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2699609375, "calib/std_conf": 0.14041315966715812, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1523.0, "completions/max_terminated_length": 1523.0, "completions/mean_length": 330.5234375, "completions/mean_terminated_length": 335.7698669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.16, "grad_norm": 0.021546130999922752, "learning_rate": 1.3888888888888892e-06, "loss": -0.0018, "num_tokens": 35984362.0, "reward": 1.2923448085784912, "reward_std": 0.24288509786128998, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7193816304206848, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7546848058700562, "step": 150 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5515151515151515, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.3213833992094863, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8972332015810277, "calib/gap": 0.01765909090909057, "calib/mean_conf": 0.9527667984189724, "calib/mu_c": 0.9589090909090907, "calib/mu_w": 0.9412500000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3109881422924902, "calib/std_conf": 0.11751819707896556, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 386.18359375, "completions/mean_terminated_length": 390.7628479003906, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.16106666666666666, "grad_norm": 0.018017679452896118, "learning_rate": 1.3611111111111112e-06, "loss": 0.0017, "num_tokens": 36190249.0, "reward": 1.2281548976898193, "reward_std": 0.21985679864883423, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.6665081977844238, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.691423773765564, "step": 151 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5462731549544253, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.27741176470588247, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9294117647058824, "calib/gap": 0.037375036753896085, "calib/mean_conf": 0.952156862745098, "calib/mu_c": 0.963296089385475, "calib/mu_w": 0.9259210526315789, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26380392156862753, "calib/std_conf": 0.15164604977936957, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 343.5625, "completions/mean_terminated_length": 349.0158996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.16213333333333332, "grad_norm": 0.015075085684657097, "learning_rate": 1.3333333333333334e-06, "loss": 0.0132, "num_tokens": 36383593.0, "reward": 1.3103210926055908, "reward_std": 0.3125706613063812, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7180163860321045, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7302993535995483, "step": 152 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5365125535823638, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.2637254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": 0.015701163502755744, "calib/mean_conf": 0.9630196078431372, "calib/mu_c": 0.967391304347826, "calib/mu_w": 0.9516901408450703, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.25258823529411756, "calib/std_conf": 0.10326768759784928, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1556.0, "completions/max_terminated_length": 1556.0, "completions/mean_length": 352.6484375, "completions/mean_terminated_length": 359.6733093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1632, "grad_norm": 0.01362534798681736, "learning_rate": 1.3055555555555556e-06, "loss": 0.0097, "num_tokens": 36581191.0, "reward": 1.3346046209335327, "reward_std": 0.2605186700820923, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7253695130348206, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7536742687225342, "step": 153 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5499844672258466, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.3982812499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.92578125, "calib/gap": 0.03995402298850581, "calib/mean_conf": 0.9592968750000002, "calib/mu_c": 0.9766206896551725, "calib/mu_w": 0.9366666666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39558593749999993, "calib/std_conf": 0.1105220820254232, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 911.0, "completions/max_terminated_length": 911.0, "completions/mean_length": 352.546875, "completions/mean_terminated_length": 358.14288330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.16426666666666667, "grad_norm": 0.014393088407814503, "learning_rate": 1.2777777777777779e-06, "loss": 0.0071, "num_tokens": 36775883.0, "reward": 1.118644118309021, "reward_std": 0.24269017577171326, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6040284633636475, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.608829140663147, "step": 154 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5624725584896193, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.38835937500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.94921875, "calib/gap": 0.04675531581258241, "calib/mean_conf": 0.9703906250000002, "calib/mu_c": 0.9899328859060402, "calib/mu_w": 0.9431775700934578, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.38835937500000006, "calib/std_conf": 0.08832559035811406, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 313.83203125, "completions/mean_terminated_length": 318.8135070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.16533333333333333, "grad_norm": 0.021846825256943703, "learning_rate": 1.25e-06, "loss": 0.0124, "num_tokens": 36963440.0, "reward": 1.1388394832611084, "reward_std": 0.3869188725948334, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6130414009094238, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6220041513442993, "step": 155 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5690373563218389, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.27799212598425216, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.905511811023622, "calib/gap": 0.030556034482758454, "calib/mean_conf": 0.9583070866141733, "calib/mu_c": 0.9679310344827585, "calib/mu_w": 0.9373750000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.27562992125984276, "calib/std_conf": 0.10706117257992902, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1734.0, "completions/max_terminated_length": 1734.0, "completions/mean_length": 359.17578125, "completions/mean_terminated_length": 364.87701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1664, "grad_norm": 0.018562057986855507, "learning_rate": 1.2222222222222223e-06, "loss": 0.0015, "num_tokens": 37160149.0, "reward": 1.2960621118545532, "reward_std": 0.2763296365737915, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.7057308554649353, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7675800323486328, "step": 156 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5499881544657663, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2316796874999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.93359375, "calib/gap": 0.03602069019979448, "calib/mean_conf": 0.9680859375, "calib/mu_c": 0.9775132275132276, "calib/mu_w": 0.9414925373134331, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2307421874999999, "calib/std_conf": 0.08449209720291058, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 894.0, "completions/max_terminated_length": 894.0, "completions/mean_length": 354.03515625, "completions/mean_terminated_length": 359.65478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.16746666666666668, "grad_norm": 0.01100405678153038, "learning_rate": 1.1944444444444446e-06, "loss": -0.0072, "num_tokens": 37354510.0, "reward": 1.3753619194030762, "reward_std": 0.25061172246932983, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7607488036155701, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7875739336013794, "step": 157 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5573757763975156, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2553543307086614, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9488188976377953, "calib/gap": 0.03207142857142853, "calib/mean_conf": 0.9686614173228347, "calib/mu_c": 0.9775, "calib/mu_w": 0.9454285714285715, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24980314960629924, "calib/std_conf": 0.09961628906363702, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2226.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 337.45703125, "completions/mean_terminated_length": 342.8135070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.16853333333333334, "grad_norm": 0.017817601561546326, "learning_rate": 1.1666666666666668e-06, "loss": -0.0007, "num_tokens": 37546139.0, "reward": 1.3404242992401123, "reward_std": 0.2871135473251343, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7377734184265137, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7567362785339355, "step": 158 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5081527347781217, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.31234375000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9453125, "calib/gap": 0.0016952184382525992, "calib/mean_conf": 0.9712500000000002, "calib/mu_c": 0.971812865497076, "calib/mu_w": 0.9701176470588234, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30781250000000004, "calib/std_conf": 0.07885409786434691, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 840.0, "completions/max_terminated_length": 840.0, "completions/mean_length": 328.06640625, "completions/mean_terminated_length": 333.2738342285156, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.1696, "grad_norm": 0.012218823656439781, "learning_rate": 1.138888888888889e-06, "loss": 0.0057, "num_tokens": 37734908.0, "reward": 1.2784302234649658, "reward_std": 0.27361583709716797, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.6807679533958435, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7610776424407959, "step": 159 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6176856851014154, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.26082352941176473, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9019607843137255, "calib/gap": 0.07535896687582111, "calib/mean_conf": 0.9549411764705882, "calib/mu_c": 0.9776966292134831, "calib/mu_w": 0.902337662337662, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25886274509803925, "calib/std_conf": 0.11060571409639187, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 991.0, "completions/max_terminated_length": 991.0, "completions/mean_length": 319.0546875, "completions/mean_terminated_length": 324.11907958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.17066666666666666, "grad_norm": 0.06428356468677521, "learning_rate": 1.111111111111111e-06, "loss": -0.0086, "num_tokens": 37921426.0, "reward": 1.317893624305725, "reward_std": 0.26460549235343933, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7398542761802673, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7543764114379883, "step": 160 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6123531640773019, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.17858823529411774, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9490196078431372, "calib/gap": 0.08916635089048863, "calib/mean_conf": 0.9671372549019608, "calib/mu_c": 0.9853201970443349, "calib/mu_w": 0.8961538461538463, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.17482352941176482, "calib/std_conf": 0.10710834163297644, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1006.0, "completions/max_terminated_length": 1006.0, "completions/mean_length": 314.2890625, "completions/mean_terminated_length": 319.2778015136719, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.17173333333333332, "grad_norm": 0.0163392536342144, "learning_rate": 1.0833333333333335e-06, "loss": -0.0102, "num_tokens": 38105804.0, "reward": 1.4518227577209473, "reward_std": 0.16481463611125946, "rewards/accuracy_reward_step": 0.79296875, "rewards/final_brier_reward_step": 0.8226531147956848, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8166691064834595, "step": 161 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6603683846419923, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.19941176470588245, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9137254901960784, "calib/gap": 0.10326703562781026, "calib/mean_conf": 0.9598823529411764, "calib/mu_c": 0.9837755102040816, "calib/mu_w": 0.8805084745762713, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19533333333333344, "calib/std_conf": 0.10661818390669438, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 784.0, "completions/max_terminated_length": 784.0, "completions/mean_length": 299.15625, "completions/mean_terminated_length": 303.90478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1728, "grad_norm": 0.0195932537317276, "learning_rate": 1.0555555555555557e-06, "loss": -0.0195, "num_tokens": 38286532.0, "reward": 1.423150897026062, "reward_std": 0.22289392352104187, "rewards/accuracy_reward_step": 0.765625, "rewards/final_brier_reward_step": 0.807776927947998, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.82623291015625, "step": 162 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6668481219379423, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.2699215686274509, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8431372549019608, "calib/gap": 0.13229722373434927, "calib/mean_conf": 0.9248235294117646, "calib/mu_c": 0.9704790419161676, "calib/mu_w": 0.8381818181818184, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2699215686274509, "calib/std_conf": 0.1701621724607524, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 374.8671875, "completions/mean_terminated_length": 379.312255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.17386666666666667, "grad_norm": 0.015320651233196259, "learning_rate": 1.0277777777777777e-06, "loss": -0.0221, "num_tokens": 38487330.0, "reward": 1.2625629901885986, "reward_std": 0.31058937311172485, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7291222810745239, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7156611680984497, "step": 163 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6693440428380187, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2948437499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.87109375, "calib/gap": 0.1164390896921017, "calib/mean_conf": 0.9432812500000001, "calib/mu_c": 0.9842168674698795, "calib/mu_w": 0.8677777777777778, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2948437499999999, "calib/std_conf": 0.12630326459928698, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1086.0, "completions/max_terminated_length": 1086.0, "completions/mean_length": 387.26953125, "completions/mean_terminated_length": 393.41668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.17493333333333333, "grad_norm": 0.018808778375387192, "learning_rate": 1.0000000000000002e-06, "loss": -0.0171, "num_tokens": 38692607.0, "reward": 1.2607839107513428, "reward_std": 0.2697381377220154, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7221589684486389, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7311328649520874, "step": 164 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5689794146825397, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.3952734375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.890625, "calib/gap": 0.04846230158730169, "calib/mean_conf": 0.9551171875000001, "calib/mu_c": 0.9763194444444445, "calib/mu_w": 0.9278571428571428, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3939453125000001, "calib/std_conf": 0.10406196479545175, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1019.0, "completions/max_terminated_length": 1019.0, "completions/mean_length": 378.29296875, "completions/mean_terminated_length": 384.2976379394531, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.176, "grad_norm": 0.015589786693453789, "learning_rate": 9.722222222222224e-07, "loss": -0.0006, "num_tokens": 38895026.0, "reward": 1.126084327697754, "reward_std": 0.2573801875114441, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6127816438674927, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.641555666923523, "step": 165 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6028143274853801, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.268671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.953125, "calib/gap": 0.06056725146198838, "calib/mean_conf": 0.9717968750000001, "calib/mu_c": 0.9897777777777778, "calib/mu_w": 0.9292105263157894, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.268671875, "calib/std_conf": 0.083183096180861, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 925.0, "completions/max_terminated_length": 925.0, "completions/mean_length": 362.3984375, "completions/mean_terminated_length": 368.15081787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.17706666666666668, "grad_norm": 0.012876220047473907, "learning_rate": 9.444444444444445e-07, "loss": 0.0251, "num_tokens": 39093984.0, "reward": 1.3327629566192627, "reward_std": 0.1636142134666443, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7372105121612549, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7852473855018616, "step": 166 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.543727408974698, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.20011718750000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9609375, "calib/gap": 0.027591466102441786, "calib/mean_conf": 0.9728515625000002, "calib/mu_c": 0.9789949748743718, "calib/mu_w": 0.95140350877193, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19781250000000006, "calib/std_conf": 0.0840809310504385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 386.5390625, "completions/mean_terminated_length": 392.67462158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.17813333333333334, "grad_norm": 0.01270974613726139, "learning_rate": 9.166666666666666e-07, "loss": -0.0092, "num_tokens": 39298546.0, "reward": 1.4282212257385254, "reward_std": 0.23805132508277893, "rewards/accuracy_reward_step": 0.77734375, "rewards/final_brier_reward_step": 0.7911777496337891, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8123321533203125, "step": 167 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.51925, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2987843137254903, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9764705882352941, "calib/gap": 0.013542857142856857, "calib/mean_conf": 0.9812941176470589, "calib/mu_c": 0.9855428571428569, "calib/mu_w": 0.9720000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2969019607843138, "calib/std_conf": 0.057601502607595445, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2376.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 395.890625, "completions/mean_terminated_length": 400.5849914550781, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.1792, "grad_norm": 0.014562560245394707, "learning_rate": 8.88888888888889e-07, "loss": 0.0117, "num_tokens": 39504566.0, "reward": 1.287435531616211, "reward_std": 0.3017340302467346, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.6974402070045471, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7218332886695862, "step": 168 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6397849462365591, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.30558593750000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9140625, "calib/gap": 0.13129032258064532, "calib/mean_conf": 0.9423046875000001, "calib/mu_c": 0.99, "calib/mu_w": 0.8587096774193547, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30558593750000007, "calib/std_conf": 0.1669503602587528, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1074.0, "completions/max_terminated_length": 1074.0, "completions/mean_length": 391.8359375, "completions/mean_terminated_length": 398.0555725097656, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.18026666666666666, "grad_norm": 0.01971629448235035, "learning_rate": 8.611111111111112e-07, "loss": -0.0115, "num_tokens": 39709060.0, "reward": 1.24666166305542, "reward_std": 0.22210454940795898, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7081738114356995, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7315980195999146, "step": 169 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5796996299789595, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.26749999999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.8984375, "calib/gap": 0.06291518537328622, "calib/mean_conf": 0.93671875, "calib/mu_c": 0.9556424581005587, "calib/mu_w": 0.8927272727272725, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25249999999999995, "calib/std_conf": 0.1745132040804864, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 374.12890625, "completions/mean_terminated_length": 381.5816955566406, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.18133333333333335, "grad_norm": 0.016197988763451576, "learning_rate": 8.333333333333333e-07, "loss": 0.0014, "num_tokens": 39908989.0, "reward": 1.3212825059890747, "reward_std": 0.16238829493522644, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7292906045913696, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7589644193649292, "step": 170 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.547026149909505, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.3962109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9609375, "calib/gap": 0.03829432690507417, "calib/mean_conf": 0.9704296875, "calib/mu_c": 0.9867346938775511, "calib/mu_w": 0.9484403669724769, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3962109375, "calib/std_conf": 0.10364174952041452, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 866.0, "completions/max_terminated_length": 866.0, "completions/mean_length": 366.94921875, "completions/mean_terminated_length": 372.7738342285156, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.1824, "grad_norm": 0.012922896072268486, "learning_rate": 8.055555555555557e-07, "loss": 0.0027, "num_tokens": 40109824.0, "reward": 1.1314560174942017, "reward_std": 0.30215245485305786, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6065089702606201, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6224400401115417, "step": 171 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5783914936130735, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.2826377952755905, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.937007874015748, "calib/gap": 0.06965746092913738, "calib/mean_conf": 0.963740157480315, "calib/mu_c": 0.9859537572254334, "calib/mu_w": 0.916296296296296, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2826377952755905, "calib/std_conf": 0.10689509999585951, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 387.7578125, "completions/mean_terminated_length": 390.81103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.18346666666666667, "grad_norm": 0.013554791919887066, "learning_rate": 7.777777777777779e-07, "loss": 0.0074, "num_tokens": 40312442.0, "reward": 1.287895679473877, "reward_std": 0.31955093145370483, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.716030478477478, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7441458702087402, "step": 172 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5242834394904459, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.36363636363636365, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.932806324110672, "calib/gap": 0.007741507430997774, "calib/mean_conf": 0.9543873517786563, "calib/mu_c": 0.9573248407643311, "calib/mu_w": 0.9495833333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3487351778656127, "calib/std_conf": 0.14200470458617728, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 438.78515625, "completions/mean_terminated_length": 443.9881591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.18453333333333333, "grad_norm": 0.015299421735107899, "learning_rate": 7.5e-07, "loss": 0.0104, "num_tokens": 40527931.0, "reward": 1.1803079843521118, "reward_std": 0.27851802110671997, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6251249313354492, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6625131964683533, "step": 173 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5098488762593645, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.3548437500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.84375, "calib/gap": -0.00232498062516151, "calib/mean_conf": 0.9246875000000001, "calib/mu_c": 0.9237974683544302, "calib/mu_w": 0.9261224489795917, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3311718750000001, "calib/std_conf": 0.16546543020749077, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2311.0, "completions/max_terminated_length": 2311.0, "completions/mean_length": 431.4609375, "completions/mean_terminated_length": 438.3095397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1856, "grad_norm": 0.023088471964001656, "learning_rate": 7.222222222222222e-07, "loss": 0.0267, "num_tokens": 40742617.0, "reward": 1.1954487562179565, "reward_std": 0.3647494912147522, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6400851607322693, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6885848045349121, "step": 174 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6150722915428798, "calib/avg_num_step_conf": 0.9765625, "calib/ece": 0.32654761904761903, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8253968253968254, "calib/gap": 0.08884729649435519, "calib/mean_conf": 0.9131349206349207, "calib/mu_c": 0.9480392156862746, "calib/mu_w": 0.8591919191919194, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.31626984126984126, "calib/std_conf": 0.18503778478035848, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2231.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 442.9375, "completions/mean_terminated_length": 449.9682922363281, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.18666666666666668, "grad_norm": 0.016742026433348656, "learning_rate": 6.944444444444446e-07, "loss": 0.0001, "num_tokens": 40961833.0, "reward": 1.1773141622543335, "reward_std": 0.25273728370666504, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6652753353118896, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6767937541007996, "step": 175 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6031512605042016, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2814173228346457, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8858267716535433, "calib/gap": 0.07469747899159684, "calib/mean_conf": 0.9507086614173229, "calib/mu_c": 0.9754117647058823, "calib/mu_w": 0.9007142857142855, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2814173228346457, "calib/std_conf": 0.11365911946162029, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2432.0, "completions/max_terminated_length": 2432.0, "completions/mean_length": 412.796875, "completions/mean_terminated_length": 417.69171142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.18773333333333334, "grad_norm": 0.010885723866522312, "learning_rate": 6.666666666666667e-07, "loss": -0.0247, "num_tokens": 41171573.0, "reward": 1.281128168106079, "reward_std": 0.2560807764530182, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7139906287193298, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7620842456817627, "step": 176 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5999312005503956, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.30472656249999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.93359375, "calib/gap": 0.05555349157206768, "calib/mean_conf": 0.9674609375000001, "calib/mu_c": 0.9859064327485381, "calib/mu_w": 0.9303529411764704, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30210937499999996, "calib/std_conf": 0.08658561535625356, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1088.0, "completions/max_terminated_length": 1088.0, "completions/mean_length": 384.0546875, "completions/mean_terminated_length": 390.15081787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.1888, "grad_norm": 0.011047380976378918, "learning_rate": 6.388888888888889e-07, "loss": 0.0004, "num_tokens": 41373723.0, "reward": 1.278724193572998, "reward_std": 0.29079896211624146, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7056628465652466, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7373589873313904, "step": 177 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5691667882679119, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2768627450980393, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9529411764705882, "calib/gap": 0.045080256821830034, "calib/mean_conf": 0.9700392156862745, "calib/mu_c": 0.9836516853932584, "calib/mu_w": 0.9385714285714284, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27443137254901967, "calib/std_conf": 0.09140943461503169, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2289.0, "completions/max_terminated_length": 2289.0, "completions/mean_length": 391.7421875, "completions/mean_terminated_length": 396.3873596191406, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.18986666666666666, "grad_norm": 0.011059127748012543, "learning_rate": 6.111111111111112e-07, "loss": -0.0017, "num_tokens": 41580081.0, "reward": 1.3147163391113281, "reward_std": 0.24289332330226898, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7230484485626221, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7584728002548218, "step": 178 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5800461065573771, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.20083003952569184, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.924901185770751, "calib/gap": 0.06552083333333314, "calib/mean_conf": 0.9597233201581028, "calib/mu_c": 0.9755208333333334, "calib/mu_w": 0.9100000000000003, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.20083003952569184, "calib/std_conf": 0.10948461065375452, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2217.0, "completions/max_terminated_length": 2217.0, "completions/mean_length": 387.953125, "completions/mean_terminated_length": 395.6812744140625, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.19093333333333334, "grad_norm": 0.015431476756930351, "learning_rate": 5.833333333333334e-07, "loss": -0.0172, "num_tokens": 41785661.0, "reward": 1.3875930309295654, "reward_std": 0.2205352485179901, "rewards/accuracy_reward_step": 0.75, "rewards/final_brier_reward_step": 0.7794409990310669, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7826500535011292, "step": 179 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5692847543676274, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.2600392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9568627450980393, "calib/gap": 0.06260564431835158, "calib/mean_conf": 0.969843137254902, "calib/mu_c": 0.9880110497237569, "calib/mu_w": 0.9254054054054053, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.2600392156862745, "calib/std_conf": 0.10153708345056905, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2228.0, "completions/max_terminated_length": 2228.0, "completions/mean_length": 486.34375, "completions/mean_terminated_length": 492.1106872558594, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.192, "grad_norm": 0.009874767623841763, "learning_rate": 5.555555555555555e-07, "loss": 0.0028, "num_tokens": 42014021.0, "reward": 1.334794521331787, "reward_std": 0.19724637269973755, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7308593988418579, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8075376152992249, "step": 180 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5384439359267734, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.34425781250000015, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.95703125, "calib/gap": 0.029802549852893434, "calib/mean_conf": 0.9731640625000001, "calib/mu_c": 0.984223602484472, "calib/mu_w": 0.9544210526315786, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34425781250000015, "calib/std_conf": 0.0815508159584936, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 922.0, "completions/max_terminated_length": 922.0, "completions/mean_length": 351.8203125, "completions/mean_terminated_length": 357.40478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.19306666666666666, "grad_norm": 0.015213570557534695, "learning_rate": 5.277777777777779e-07, "loss": -0.0068, "num_tokens": 42210351.0, "reward": 1.2136608362197876, "reward_std": 0.34391531348228455, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6553636789321899, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.68365478515625, "step": 181 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.58125, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.284609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.94921875, "calib/gap": 0.05725000000000002, "calib/mean_conf": 0.972109375, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9327500000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.284609375, "calib/std_conf": 0.08250144263653439, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 391.90234375, "completions/mean_terminated_length": 398.123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.19413333333333332, "grad_norm": 0.017016498371958733, "learning_rate": 5.000000000000001e-07, "loss": -0.0013, "num_tokens": 42416838.0, "reward": 1.29753839969635, "reward_std": 0.2675790786743164, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.7218691110610962, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7221903204917908, "step": 182 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5960775335775337, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.24007874015748024, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.905511811023622, "calib/gap": 0.0650549450549448, "calib/mean_conf": 0.9566141732283465, "calib/mu_c": 0.9750549450549449, "calib/mu_w": 0.9100000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.24007874015748024, "calib/std_conf": 0.10864027524550372, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2330.0, "completions/max_terminated_length": 2330.0, "completions/mean_length": 452.01953125, "completions/mean_terminated_length": 455.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.1952, "grad_norm": 0.013871497474610806, "learning_rate": 4.7222222222222226e-07, "loss": 0.0098, "num_tokens": 42639235.0, "reward": 1.3330657482147217, "reward_std": 0.28158819675445557, "rewards/accuracy_reward_step": 0.7109375, "rewards/final_brier_reward_step": 0.744078516960144, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7561532258987427, "step": 183 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5681907920713891, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.23460937499999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.90625, "calib/gap": 0.05212193003237764, "calib/mean_conf": 0.9477343750000001, "calib/mu_c": 0.9613756613756614, "calib/mu_w": 0.9092537313432838, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22203124999999996, "calib/std_conf": 0.14333208187059648, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 952.0, "completions/max_terminated_length": 952.0, "completions/mean_length": 392.296875, "completions/mean_terminated_length": 400.111572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.19626666666666667, "grad_norm": 0.018339090049266815, "learning_rate": 4.444444444444445e-07, "loss": -0.0448, "num_tokens": 42844943.0, "reward": 1.3760201930999756, "reward_std": 0.2757716178894043, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7625054121017456, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.788450300693512, "step": 184 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5971844806763286, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.2430078125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.92578125, "calib/gap": 0.070018115942029, "calib/mean_conf": 0.9594921875, "calib/mu_c": 0.9791847826086958, "calib/mu_w": 0.9091666666666668, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2418750000000001, "calib/std_conf": 0.11326534775236796, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1172.0, "completions/max_terminated_length": 1172.0, "completions/mean_length": 394.21484375, "completions/mean_terminated_length": 400.4722595214844, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.19733333333333333, "grad_norm": 0.014981143176555634, "learning_rate": 4.1666666666666667e-07, "loss": 0.0006, "num_tokens": 43052782.0, "reward": 1.343614101409912, "reward_std": 0.2797345519065857, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7552183270454407, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7520507574081421, "step": 185 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6204629629629629, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.17055118110236234, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8858267716535433, "calib/gap": 0.09249999999999992, "calib/mean_conf": 0.9428346456692914, "calib/mu_c": 0.9625, "calib/mu_w": 0.8700000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.16299212598425208, "calib/std_conf": 0.1334248889990245, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1760.0, "completions/max_terminated_length": 1760.0, "completions/mean_length": 406.8359375, "completions/mean_terminated_length": 411.66009521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.1984, "grad_norm": 0.011278253979980946, "learning_rate": 3.8888888888888895e-07, "loss": 0.0058, "num_tokens": 43261972.0, "reward": 1.441054105758667, "reward_std": 0.22419771552085876, "rewards/accuracy_reward_step": 0.78125, "rewards/final_brier_reward_step": 0.8151882886886597, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8318405747413635, "step": 186 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5388257575757576, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.26692913385826783, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9173228346456693, "calib/gap": 0.027693764568764823, "calib/mean_conf": 0.955984251968504, "calib/mu_c": 0.9644886363636364, "calib/mu_w": 0.9367948717948715, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26500000000000007, "calib/std_conf": 0.12007386075022351, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2282.0, "completions/max_terminated_length": 2282.0, "completions/mean_length": 473.43359375, "completions/mean_terminated_length": 479.0474548339844, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.19946666666666665, "grad_norm": 0.012535003013908863, "learning_rate": 3.611111111111111e-07, "loss": -0.0152, "num_tokens": 43484715.0, "reward": 1.2956682443618774, "reward_std": 0.36760303378105164, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.7097882628440857, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7306970953941345, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5494287820158468, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.19082352941176478, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9450980392156862, "calib/gap": 0.05154228855721377, "calib/mean_conf": 0.967294117647059, "calib/mu_c": 0.9782089552238806, "calib/mu_w": 0.9266666666666669, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1849411764705883, "calib/std_conf": 0.09701491579631738, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 437.33984375, "completions/mean_terminated_length": 444.2817687988281, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.20053333333333334, "grad_norm": 0.009285026229918003, "learning_rate": 3.3333333333333335e-07, "loss": -0.0108, "num_tokens": 43700746.0, "reward": 1.4380080699920654, "reward_std": 0.22062596678733826, "rewards/accuracy_reward_step": 0.78515625, "rewards/final_brier_reward_step": 0.7978414297103882, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8252846598625183, "step": 188 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6048967459324155, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.22191406249999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.91015625, "calib/gap": 0.09086670838548161, "calib/mean_conf": 0.9487890625000002, "calib/mu_c": 0.9729255319148936, "calib/mu_w": 0.882058823529412, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21816406249999992, "calib/std_conf": 0.13962456089589356, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 938.0, "completions/max_terminated_length": 938.0, "completions/mean_length": 345.25390625, "completions/mean_terminated_length": 350.7341613769531, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.2016, "grad_norm": 0.0154718067497015, "learning_rate": 3.055555555555556e-07, "loss": -0.0036, "num_tokens": 43896899.0, "reward": 1.3746814727783203, "reward_std": 0.24886444211006165, "rewards/accuracy_reward_step": 0.734375, "rewards/final_brier_reward_step": 0.7749136686325073, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7863121032714844, "step": 189 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6176430976430977, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2865098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8627450980392157, "calib/gap": 0.10253535353535337, "calib/mean_conf": 0.9335686274509803, "calib/mu_c": 0.9697575757575757, "calib/mu_w": 0.8672222222222223, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2865098039215686, "calib/std_conf": 0.15023449055732196, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2168.0, "completions/max_terminated_length": 2168.0, "completions/mean_length": 431.15625, "completions/mean_terminated_length": 436.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.20266666666666666, "grad_norm": 0.011575455777347088, "learning_rate": 2.7777777777777776e-07, "loss": 0.0006, "num_tokens": 44112883.0, "reward": 1.2472643852233887, "reward_std": 0.287927508354187, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7110124826431274, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7038266658782959, "step": 190 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5946714612618502, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.3163281250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.07856554429552176, "calib/mean_conf": 0.9435156250000001, "calib/mu_c": 0.9726708074534163, "calib/mu_w": 0.8941052631578945, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3154687500000001, "calib/std_conf": 0.12701556156967292, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 409.015625, "completions/mean_terminated_length": 415.5079650878906, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.20373333333333332, "grad_norm": 0.013394687324762344, "learning_rate": 2.5000000000000004e-07, "loss": 0.0111, "num_tokens": 44321759.0, "reward": 1.216658353805542, "reward_std": 0.21099448204040527, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6835765838623047, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6752440929412842, "step": 191 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6106445672191528, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.24542968750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.90625, "calib/gap": 0.08966261510128892, "calib/mean_conf": 0.9524609375000002, "calib/mu_c": 0.9787292817679557, "calib/mu_w": 0.8890666666666668, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24542968750000005, "calib/std_conf": 0.12128784218387718, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 400.14453125, "completions/mean_terminated_length": 406.4960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.2048, "grad_norm": 0.012041359208524227, "learning_rate": 2.2222222222222224e-07, "loss": 0.0196, "num_tokens": 44529172.0, "reward": 1.3370025157928467, "reward_std": 0.27880859375, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7550605535507202, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7648242712020874, "step": 192 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5994388620644656, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.34203921568627454, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9098039215686274, "calib/gap": 0.07824481273652617, "calib/mean_conf": 0.9546666666666667, "calib/mu_c": 0.9844303797468353, "calib/mu_w": 0.9061855670103092, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3385490196078432, "calib/std_conf": 0.11702097460170131, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1797.0, "completions/max_terminated_length": 1797.0, "completions/mean_length": 402.5546875, "completions/mean_terminated_length": 408.9444580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.20586666666666667, "grad_norm": 0.015237949788570404, "learning_rate": 1.9444444444444447e-07, "loss": 0.023, "num_tokens": 44737938.0, "reward": 1.2062556743621826, "reward_std": 0.2972811758518219, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.668610155582428, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6993814706802368, "step": 193 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6618576322801675, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.2236328125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.87890625, "calib/gap": 0.12279634564141584, "calib/mean_conf": 0.9404296875000001, "calib/mu_c": 0.9744864864864864, "calib/mu_w": 0.8516901408450706, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22070312500000014, "calib/std_conf": 0.14121753961053257, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 948.0, "completions/max_terminated_length": 948.0, "completions/mean_length": 373.765625, "completions/mean_terminated_length": 379.69842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.20693333333333333, "grad_norm": 0.010832453146576881, "learning_rate": 1.6666666666666668e-07, "loss": -0.0044, "num_tokens": 44939566.0, "reward": 1.3566707372665405, "reward_std": 0.17240163683891296, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7775343656539917, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7624297738075256, "step": 194 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5736613119143239, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.3185156250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9375, "calib/gap": 0.0459651941097724, "calib/mean_conf": 0.96625, "calib/mu_c": 0.9824096385542168, "calib/mu_w": 0.9364444444444444, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31816406250000007, "calib/std_conf": 0.09477357886035538, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1007.0, "completions/max_terminated_length": 1007.0, "completions/mean_length": 399.56640625, "completions/mean_terminated_length": 405.90875244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.208, "grad_norm": 0.015377230010926723, "learning_rate": 1.3888888888888888e-07, "loss": 0.0098, "num_tokens": 45147839.0, "reward": 1.2390071153640747, "reward_std": 0.23279809951782227, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6790202856063843, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6910707354545593, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5823187229437229, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.31113281250000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.94140625, "calib/gap": 0.06042748917748897, "calib/mean_conf": 0.9673828125000001, "calib/mu_c": 0.9881547619047618, "calib/mu_w": 0.9277272727272728, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31113281250000013, "calib/std_conf": 0.1007888123979534, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 331.3671875, "completions/mean_terminated_length": 336.62701416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.20906666666666668, "grad_norm": 0.013079122640192509, "learning_rate": 1.1111111111111112e-07, "loss": -0.0045, "num_tokens": 45335213.0, "reward": 1.2558143138885498, "reward_std": 0.1615125834941864, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6947152018547058, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7035418748855591, "step": 196 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5854145166838666, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.289609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.92578125, "calib/gap": 0.07422222222222219, "calib/mean_conf": 0.9575781250000002, "calib/mu_c": 0.9822222222222221, "calib/mu_w": 0.9079999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.289609375, "calib/std_conf": 0.1220799308710665, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 385.578125, "completions/mean_terminated_length": 391.69842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.21013333333333334, "grad_norm": 0.01792999915778637, "learning_rate": 8.333333333333334e-08, "loss": -0.0136, "num_tokens": 45538977.0, "reward": 1.2794477939605713, "reward_std": 0.3353152275085449, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7123593091964722, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7335565090179443, "step": 197 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6269806972054163, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.25910156250000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8828125, "calib/gap": 0.09276145203111508, "calib/mean_conf": 0.9448828125000001, "calib/mu_c": 0.9731460674157305, "calib/mu_w": 0.8803846153846154, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25433593750000005, "calib/std_conf": 0.13228160781866025, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 940.0, "completions/max_terminated_length": 940.0, "completions/mean_length": 351.01953125, "completions/mean_terminated_length": 356.5912780761719, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.2112, "grad_norm": 0.012769196182489395, "learning_rate": 5.555555555555556e-08, "loss": -0.004, "num_tokens": 45734222.0, "reward": 1.3227304220199585, "reward_std": 0.24605156481266022, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7475889921188354, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7659890651702881, "step": 198 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6333333333333333, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.2520703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.921875, "calib/gap": 0.1054666666666666, "calib/mean_conf": 0.9591015625000001, "calib/mu_c": 0.99, "calib/mu_w": 0.8845333333333334, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2520703125, "calib/std_conf": 0.10994127266435745, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1558.0, "completions/max_terminated_length": 1558.0, "completions/mean_length": 409.1953125, "completions/mean_terminated_length": 415.69049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.21226666666666666, "grad_norm": 0.017679810523986816, "learning_rate": 2.777777777777778e-08, "loss": 0.0004, "num_tokens": 45943176.0, "reward": 1.342065453529358, "reward_std": 0.27533435821533203, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7607722282409668, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.787177324295044, "step": 199 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5451907790143085, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.2427667984189725, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9525691699604744, "calib/gap": 0.039386327503974417, "calib/mean_conf": 0.9670355731225297, "calib/mu_c": 0.9776216216216217, "calib/mu_w": 0.9382352941176473, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2392885375494073, "calib/std_conf": 0.11110435005059703, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1861.0, "completions/max_terminated_length": 1861.0, "completions/mean_length": 431.7578125, "completions/mean_terminated_length": 438.61114501953125, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.21333333333333335, "grad_norm": 0.009260621853172779, "learning_rate": 0.0, "loss": -0.0021, "num_tokens": 46161754.0, "reward": 1.345501184463501, "reward_std": 0.16192062199115753, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7421952486038208, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7609031200408936, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.0005728554155211895, "train_runtime": 9069.3768, "train_samples_per_second": 5.645, "train_steps_per_second": 0.022 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 46161754, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }